TwinkleModel
TwinkleModel是twinkle所有模型的基类。twinkle的模型不单单包含了模型本身,也包含了模型配套的训练组件。我们在其他文档中介绍的组件基本均在这里进行组合使用。
任何模型符合TwinkleModel的基类设定均可以配合框架的其他组件使用:
class TwinkleModel(ABC):
@abstractmethod
def forward(self, *, inputs: Dict[str, Any], **kwargs):
# 进行一次forward,并返回logits
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def forward_only(self, *, inputs: Dict[str, Any], **kwargs):
# 以推理模式进行一次forward,并返回logits
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def calculate_loss(self, **kwargs):
# 使用Loss的子类完成loss计算
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def backward(self, **kwargs):
# 进行一次backward
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def forward_backward(self, *, inputs: Dict[str, Any], **kwargs):
# 组合了forward、loss计算、backward过程,并返回loss值
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def clip_grad_norm(self, max_grad_norm: float = 1.0, norm_type=2, **kwargs):
# 梯度裁剪,发生在gradient_accumulation_steps完成的条件下,可以在kwargs传入gradient_accumulation_steps
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def step(self, **kwargs):
# 梯度更新,发生在gradient_accumulation_steps完成的条件下,可以在kwargs传入gradient_accumulation_steps
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def zero_grad(self, **kwargs):
# 梯度清理,发生在gradient_accumulation_steps完成的条件下,可以在kwargs传入gradient_accumulation_steps
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def lr_step(self, **kwargs):
# lr更新,发生在gradient_accumulation_steps完成的条件下,可以在kwargs传入gradient_accumulation_steps
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def clip_grad_and_step(self, max_grad_norm: float=1.0, norm_type=2, **kwargs):
# 组合了clip、step、zero_grad、lr_step
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def set_loss(self, loss_cls: Union[Loss, Type[Loss], str, Callable[[InputFeature, ModelOutput, ...], torch.Tensor]], **kwargs):
# 设置loss
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def set_optimizer(self, optimizer_cls: Union[Optimizer, Type[Optimizer], str], **kwargs):
# 设置optimizer
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def set_lr_scheduler(self, scheduler_cls: Union[LRScheduler, Type[LRScheduler], str], **kwargs):
# 设置lr_scheduler
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def save(self, name: str, output_dir: Optional[str] = None, **kwargs):
# 保存checkpoint
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def load(self, name: str, output_dir: Optional[str] = None, **kwargs):
# 加载checkpoint
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def get_state_dict(self, **kwargs):
# 获取state_dict
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def apply_patch(self, patch_cls: Union[Patch, Type[Patch], str], **kwargs):
# 对模型应用一个补丁
@abstractmethod
def add_metric(self, metric_cls: Union[Metric, str], is_training, **kwargs):
# 增加一个训练指标,可以设置is_training参数,代表在forward/forward_only中累加。如果不设置,则对forward/forward_only分别生效
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def calculate_metric(self, is_training: bool, **kwargs):
# 计算metric并返回
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def add_adapter_to_model(self, adapter_name: str, config_or_dir, **kwargs):
# 增加一个lora
@abstractmethod
def set_template(self, template_cls: Union[Template, Type[Template], str], **kwargs):
# 设置template
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def set_processor(self, processor_cls: Union[InputProcessor, Type[InputProcessor], str], **kwargs):
# 设置任务数据处理
# 支持adapter_name参数,对某个lora生效
@abstractmethod
def get_train_configs(self, **kwargs) -> str:
# 获取模型训练配置,用于打印
# 支持adapter_name参数,对某个lora生效