Source code for neural_pipeline.utils.file_structure_manager

"""
This module contains all classes, that work with file structure

* :class:`FileStructManager` provide all modules registration
* :class:`CheckpointsManager` provide checkpoints management
"""

import os
from abc import ABCMeta, abstractmethod
from zipfile import ZipFile

__all__ = ['FileStructManager', 'CheckpointsManager', 'FolderRegistrable']


[docs]class FolderRegistrable(metaclass=ABCMeta): """ Abstract class for implement classes, that use folders :param fsm: FileStructureManager class instance """ @abstractmethod def __init__(self, fsm: 'FileStructManager'): pass @abstractmethod def _get_gir(self) -> str: """ Get directory path to register :return: path """ @abstractmethod def _get_name(self) -> str: """ Get name of registrable object :return: name """
[docs]class CheckpointsManager(FolderRegistrable): """ Class that manage checkpoints for DataProcessor. All states pack to zip file. It contains few files: model weights, optimizer state, data processor state :param fsm: :class:'FileStructureManager' instance :param prefix: prefix of saved and loaded files """
[docs] class SMException(Exception): """ Exception for :class:`CheckpointsManager` """ def __init__(self, message: str): self.__message = message def __str__(self): return self.__message
def __init__(self, fsm: 'FileStructManager', prefix: str = None): super().__init__(fsm) self._prefix = prefix if prefix is not None else 'last' fsm.register_dir(self) self._checkpoints_dir = fsm.get_path(self, create_if_non_exists=True, check=False) if (prefix is None) and (not (os.path.exists(self._checkpoints_dir) and os.path.isdir(self._checkpoints_dir))): raise self.SMException("Checkpoints dir doesn't exists: [{}]".format(self._checkpoints_dir)) self._weights_file = os.path.join(self._checkpoints_dir, 'weights.pth') self._state_file = os.path.join(self._checkpoints_dir, 'state.pth') self._checkpoint_file = self._compile_path(self._checkpoints_dir, 'checkpoint.zip') self._trainer_file = os.path.join(self._checkpoints_dir, 'trainer.json') if not fsm.in_continue_mode() and os.path.exists(self._weights_file) and os.path.exists(self._state_file) and \ os.path.isfile(self._weights_file) and os.path.isfile(self._state_file): prev_prefix = self._prefix self._prefix = "prev_start" self.pack() self._prefix = prev_prefix
[docs] def unpack(self) -> None: """ Unpack state files """ with ZipFile(self._checkpoint_file, 'r') as zipfile: zipfile.extractall(self._checkpoints_dir) self._check_files([self._weights_file, self._state_file, self._trainer_file])
[docs] def clear_files(self) -> None: """ Clear unpacked files """ def rm_file(file: str): if os.path.exists(file) and os.path.isfile(file): os.remove(file) rm_file(self._weights_file) rm_file(self._state_file) rm_file(self._trainer_file)
[docs] def pack(self) -> None: """ Pack all files in zip """ def rm_file(file: str): if os.path.exists(file) and os.path.isfile(file): os.remove(file) def rename_file(file: str): target = file + ".old" rm_file(target) if os.path.exists(file) and os.path.isfile(file): os.rename(file, target) self._check_files([self._weights_file, self._state_file]) rename_file(self._checkpoint_file) with ZipFile(self._checkpoint_file, 'w') as zipfile: zipfile.write(self._weights_file, os.path.basename(self._weights_file)) zipfile.write(self._state_file, os.path.basename(self._state_file)) zipfile.write(self._trainer_file, os.path.basename(self._trainer_file)) self.clear_files()
[docs] def optimizer_state_file(self) -> str: """ Get optimizer state file path :return: path """ return self._state_file
[docs] def weights_file(self) -> str: """ Get model weights file path :return: path """ return self._weights_file
[docs] def trainer_file(self) -> str: """ Get trainer state file path :return: path """ return self._trainer_file
def _compile_path(self, directory: str, file: str) -> str: """ Internal method for compile result file name :return: path to result file """ return os.path.join(directory, (self._prefix + "_" if self._prefix is not None else "") + file) def _check_files(self, files) -> None: """ Internal method for checking files for condition of existing :param files: list of files pathes to check :raises: SMException """ failed = [] for f in files: if not (os.path.exists(f) and os.path.isfile(f)): failed.append(f) if len(failed) > 0: raise self.SMException("Some files doesn't exists: [{}]".format(';'.join(files))) def _get_gir(self) -> str: return os.path.join('checkpoints', self._prefix) def _get_name(self) -> str: return 'CheckpointsManager' + self._prefix
[docs]class FileStructManager: """ Class, that provide directories registration in base directory. All modules, that use file structure under base directory should register their paths in this class by pass module to method :meth:`register_dir`. If directory also registered registration method will raise exception :class:`FSMException` :param base_dir: path to directory with checkpoints :param is_continue: is `FileStructManager` used for continue training or predict :param exists_ok: if `True` - all checks for existing directories will be disabled """
[docs] class FSMException(Exception): def __init__(self, message: str): self.__message = message def __str__(self): return self.__message
class _Folder: """ Internal class, that implements logic for single registrable directory :param path: path to directory :param fsm: :class:`FileStructManager` object """ def __init__(self, path: str, fsm: 'FileStructManager'): self._path = path self._fsm = fsm self._path_first_request = True def get_path_for_check(self) -> str: """ Get folder path without any checking for existing :return: path """ return self._path def _create_directories(self) -> None: """ Internal method that create directory if this not exists and FileStructManager not in continue mode """ if self._fsm._is_continue: return if not (os.path.exists(self._path) and os.path.isdir(self._path)): os.makedirs(self._path, exist_ok=True) def get_path(self, create_if_non_exists: bool = True) -> str: """ Get folder path. This method create directory if it's not exists (if param ``create_if_non_exists == True``) :param create_if_non_exists: is need to create directory if it's doesn't exists :return: directory path """ if create_if_non_exists and self._path_first_request: self._create_directories() self._path_first_request = False return self._path def check_path(self) -> None: """ Check that directory doesn't contains any files :raises: :class:`FileStructManager.FSMException` """ if os.path.exists(self._path) and os.path.isdir(self._path): if os.listdir(self._path): raise self._fsm.FSMException("Checkpoint directory already exists [{}]".format(self._path)) def __init__(self, base_dir: str, is_continue: bool, exists_ok: bool = False): self._dirs = {} self._is_continue = is_continue self._base_dir = base_dir self._exist_ok = exists_ok
[docs] def register_dir(self, obj: FolderRegistrable, check_name_registered: bool = True, check_dir_registered: bool = True) -> None: """ Register directory in file structure :param obj: object to registration :param check_name_registered: is need to check if object name also registered :param check_dir_registered: is need to check if object path also registered :raise FileStructManager: if path or object name also registered and if path also exists (in depends of optional parameters values) """ path = os.path.join(self._base_dir, obj._get_gir()) if check_dir_registered: for n, f in self._dirs.items(): if f.get_path_for_check() == path: raise self.FSMException("Path {} already registered!".format(path)) if check_name_registered: if obj._get_name() in self._dirs: raise self.FSMException("Object {} already registered!".format(obj._get_name())) self._dirs[obj._get_name()] = self._Folder(path, self) if not self._exist_ok and not self._is_continue: self._dirs[obj._get_name()].check_path()
[docs] def get_path(self, obj: FolderRegistrable, create_if_non_exists: bool = False, check: bool = True) -> str: """ Get path of registered object :param obj: object :param create_if_non_exists: is need to create object's directory if it doesn't exists :param check: is need to check object's directory existing :return: path to directory :raise FSMException: if directory exists and ``check == True`` """ dir = self._dirs[obj._get_name()] if not self._exist_ok and not self._is_continue and check: dir.check_path() return dir.get_path(create_if_non_exists)
[docs] def in_continue_mode(self) -> bool: """ Is FileStructManager in continue mode :return: True if in continue """ return self._is_continue