""" data importer utilities """ import clr clr.AddReference('System') clr.AddReference('System.Data') import System import System.Data as db import System.Data.SqlClient as db_client import sys import csv import os import os.path import shutil import re import datetime import gc import ImporterConst as const import ImporterLtann as ltann def _my_str(v): if isinstance(v, basestring): return v.encode('latin-1') else: return str(v) class DbQuery(object): def __init__(self, connection_string, parameter_prefix=''): self.parameterPrefix = parameter_prefix self.Connection = db_client.SqlConnection(connection_string) self.Connection.Open() self.Command = None self._prevSQL = '' def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self._closeCommand() self.Connection.Close() return False def _closeCommand(self): if self.Command is not None: self.Command = None self._prevSQL = '' def _openCommand(self, sql, kwargs): if sql != self._prevSQL: self._closeCommand() self.Command = db_client.SqlCommand(sql, self.Connection) self._prevSQL = sql self.Command.Parameters.Clear() for k, v in kwargs.iteritems(): self.Command.Parameters.AddWithValue(self.parameterPrefix + k, v) def query(self, sql, process=_my_str, **kwargs): self._openCommand(sql, kwargs) fields, data = [], [] with self.Command.ExecuteReader() as reader: n = reader.FieldCount fields = [reader.GetName(i) for i in xrange(0, n)] while(reader.Read()): row = {field: process(reader[i]) for i, field in enumerate(fields)} data.append(row) result = {'fields': fields, 'data': data} return result def execute(self, sql, **kwargs): self._openCommand(sql, kwargs) result = self.Command.ExecuteNonQuery() return result def dumpCsv(fileObj, dataDict, **kwargs): def _write(writer): fields = dataDict.get('fields', []) writer.writerow(fields) data = dataDict.get('data', []) for row in data: new_row = [row[name] for name in fields] writer.writerow(new_row) if isinstance(fileObj, str): with open(fileObj, kwargs.get('fileMode', 'wb')) as f: _write(csv.writer(f, **kwargs)) else: _write(csv.writer(fileObj, **kwargs)) def createTempTables(db_query): db_query.execute(const.SqlDestinationTables) def _forceRowTypes(v): if type(v) == long: if -sys.maxint < v <= sys.maxint: return int(v) else: raise ValueError('Int value overflow: {0}'.format(v)) else: return v def _processDataDict(d): result = {k: _forceRowTypes(v) for k, v in d.iteritems()} return result def storeDataDict(db_query, data_dict, table_name): sql = 'insert into {table_name} values({fields2})'.format( table_name=table_name, fields=', '.join(data_dict['fields']), fields2=', '.join(db_query.parameterPrefix + s for s in data_dict['fields'])) data = data_dict['data'] for row in data: new_row = _processDataDict(row) #print new_row db_query.execute(sql, **new_row) def getFieldIndex(fieldsWhat, fieldsWhere, resultType=dict): result = None if resultType is tuple: result = tuple(fieldsWhere.index(field) for field in fieldsWhat) elif resultType is dict: result = {field: fieldsWhere.index(field) for field in fieldsWhat} return result def readProcMap(csvName): """ returns a dict with keys as dept and process id """ result = {} reader = csv.reader(open(csvName, 'rb')) head_flag = True key_fields = ['Current Dept ID', 'Current Process ID'] data_header = [] key_header_map = {} for line in reader: if head_flag: data_header = line key_header_map = getFieldIndex(key_fields, line) head_flag = False else: key = tuple(line[key_header_map[field]] for field in key_fields) value = {name: line[i] for i, name in enumerate(data_header)} result[key] = value return result class FuncWrap(object): def __init__(self, func, threshold): super(FuncWrap, self).__init__() self.func = func self.threshold = threshold self.count = threshold def __call__(self, arg): result = self.func(arg) self.count -= 1 if self.count < 1: gc.collect() self.count = self.threshold return result def processDataDict(srcDict, rowProcessor): """ goes through the whole srcDict and applies rowProcessor to each row in srcDict['data'] rowProcessor - function(row: list, fields: list) """ fields = srcDict['fields'] data = srcDict['data'] f = FuncWrap(lambda row: rowProcessor(row, fields), const.GcThreshold) new_data = map(f, data) result = {'fields': fields, 'data': new_data} return result def processRowDeptProcess(row, fields, srcFields, dstFields, procMap): new_row = row.copy() k = tuple(row[name] for name in srcFields) info = procMap.get(k, None) if info is not None: for dst_field, name in zip(dstFields, ['New department', 'New sub-department', 'New process']): new_row[dst_field] = info[name] return new_row def transposeDataDict(srcDict, keyName): """ takes srcDict and returns real dict with keys made from values of keyName """ result = {} for row in srcDict['data']: key = row[keyName] result[key] = row.copy() return result _fsCache = {} def _updateFsCache(directory, processor): _fsCache[directory] = {} root = _fsCache[directory] for dirpath, dirnames, filenames in os.walk(directory): for filename in map(processor, filenames): tmp = root.get(filename, []) tmp.append(processor(dirpath)) root[filename] = tmp def findFileInRoot(directory, filename, processor=str.lower, pathHint=''): result = '' if directory not in _fsCache: _updateFsCache(directory, processor) processed_filename = processor(os.path.split(filename.strip())[1]) root = _fsCache[directory] if processed_filename in root: path_list = root[processed_filename] if pathHint != '' and pathHint in path_list: result = os.path.join(pathHint, processed_filename) else: result = os.path.join(path_list[0], processed_filename) return result def copyFile(src, directory, checkAnn=False, orgWidth=0, orgHeight=0, partId='', dstExists=None, doCopy=const.DoFileCopy): result = '' result_size = 0 if dstExists is None: dstExists = lambda d: None if os.path.exists(src): file_name = os.path.split(src)[1] file_name_wo_ext, ext = os.path.splitext(file_name) if partId != '': file_name = ''.join([file_name_wo_ext, '-', str(partId), ext]) dst_dir_local = os.path.join('', *[file_name_wo_ext[i:j] for i, j in const.FileCopyPattern]) dst_dir = os.path.join(directory, dst_dir_local) if not os.path.exists(dst_dir): os.makedirs(dst_dir) dst_local = os.path.join(dst_dir_local, file_name) dst = os.path.join(dst_dir, file_name) if os.path.exists(dst): dstExists(dst) if doCopy: shutil.copy(src, dst) if checkAnn and False: ann_file = ltann.annFileExists(src) if ann_file != '': try: ltann.convertAnnToVector(ann_file, src, dst) except Exception as exc: msg = 'ERROR: {msg} (with {src}, {ann}, {dst}) ({exc})'.format(msg=exc.message, src=src, ann=ann_file, dst=dst, exc=repr(exc)) dumpString(msg) result = dst_local result_size = os.stat(dst).st_size if os.path.exists(dst) else (os.stat(src).st_size if os.path.exists(src) else 0) return (result, result_size) def loadFileList(dbQuery, processor=str.lower): directory = const.ImageSourceDir if directory not in _fsCache: _updateFsCache(directory, processor) dbQuery.execute(const.SqlTempTable) for filename, paths in _fsCache[directory].iteritems(): duplicate = 1 if len(paths) > 1 else 0 for path in paths: dbQuery.execute(const.SqlTempInsert, filename=filename, filepath=path, duplicate=duplicate) _floatRe = re.compile('[+-]?\\d*\\.?\\d+') def parseFloat(string): """ parses float from string; attempts to mimic javascript function parseFloat returns string, not float """ match = _floatRe.search(string) if match is None: return '' else: return match.group() def dumpString(s, **kwargs): now = datetime.datetime.now() result = '{dt}: {msg}'.format(dt=now.strftime('%Y-%m-%d %H:%M:%S'), msg=s.format(**kwargs)) print result return True ############################################## # const.FormControlMap add-ons _controlEmpty = lambda r, f, **kw: r _dataEmpty = _controlEmpty def _controlCode1(r, f, **kw): result = r.copy() result['width'] = -1 result['height'] = -1 return result def _dataCode1(r, f, **kw): result = r.copy() result['orgWidth'] = -1 result['orgHeight'] = -1 return result def _controlRadioGroup(r, f, **kw): result = r.copy() options = result['title'].partition(',') result['title'] = options[0] result['defaultValues'] = options[2] return result def _controlLegend(r, f, **kw): result = r.copy() imagename = findFileInRoot(const.ImageSourceDir, result['title']) imagesize = 0 if imagename != '': do_copy = False # !!!! TEMPORARY imagename, imagesize = copyFile(imagename, kw['filesDir'], dstExists=kw.get('dstExists', None)) result['defaultValues'] = imagename result['defaultValuesSize'] = imagesize result['title'] = '' return result def _dataImage(r, f, **kw): result = r.copy() imagename, imagesize = '', 0 if result['Dept1'] != '' and result['Dept2'] != '' and result['Process'] != '': imagename = findFileInRoot(const.ImageSourceDir, result['resultValue'], pathHint=result['pathHint']) if imagename != '': params = {name: result[name] for name in ['partId', 'orgWidth', 'orgHeight']} params['checkAnn'] = kw.get('checkAnn', False) params['dstExists'] = kw.get('dstExists', None) imagename, imagesize = copyFile(imagename, kw['filesDir'], **params) result['fileExists'] = 1 else: imagename = result['resultValue'] result['fileExists'] = 0 result['resultValue'] = imagename result['fileSize'] = imagesize return result _FormControlMap = { '1': { 'processRow': _controlCode1, 'processData': _dataCode1 }, # image '2': { 'processRow': _controlEmpty, 'processData': _dataImage }, # image '3': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # memo/textarea '4': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # edit/input '5': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # checkbox '6': { 'processRow': _controlRadioGroup, 'processData': _dataEmpty }, # radiogroup '7': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # label #'8': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # bevel '9': { 'processRow': _controlLegend, 'processData': _dataEmpty }, # legend '10': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # notebox - ? '11': { 'processRow': _controlEmpty, 'processData': _dataImage }, # video '12': { 'processRow': _controlEmpty, 'processData': _dataImage }, # drawing - ? '16': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # label for header #'18': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # QA button - ? #'20': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # QA grid (1) - N/A #'21': { 'processRow': _controlEmpty, 'processData': _dataEmpty }, # richtext (1) - N/A } for k, v in _FormControlMap.iteritems(): const.FormControlMap[k].update(v) const.FormControlEmpty['processRow'] = _controlEmpty const.FormControlEmpty['processData'] = _dataEmpty