# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import argparse
from nlp_architect.models.transformers.base_model import get_models
[docs]def create_base_args(parser: argparse.ArgumentParser, model_types=None):
"""Add base arguments for Transformers based models
"""
# Required parameters
if model_types is not None and len(model_types) > 1:
parser.add_argument("--model_type", default=None, type=str,
choices=model_types,
required=True,
help="Model type selected in the list: "
+ ", ".join(model_types))
parser.add_argument("--output_dir", default=None, type=str, required=True,
help="The output directory where the model predictions and checkpoints "
"will be written.")
# Other parameters
parser.add_argument("--tokenizer_name", default="", type=str,
help="Pretrained tokenizer name or path if not the same as model_name")
parser.add_argument("--max_seq_length", default=128, type=int,
help="The maximum total input sequence length after tokenization. "
"Sequences longer than this will be truncated, sequences shorter "
"will be padded.")
parser.add_argument("--cache_dir", default="", type=str,
help="Where do you want to store the pre-trained models downloaded "
"from s3")
parser.add_argument("--do_lower_case", action='store_true',
help="Set this flag if you are using an uncased model.")
parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int,
help="Batch size per GPU/CPU for evaluation.")
parser.add_argument("--no_cuda", action='store_true',
help="Avoid using CUDA when available")
parser.add_argument('--overwrite_output_dir', action='store_true',
help="Overwrite the content of the output directory")
parser.add_argument('--overwrite_cache', action='store_true',
help="Overwrite the cached training and evaluation sets")
[docs]def inference_args(parser: argparse.ArgumentParser):
""" Add inference specific arguments for Transoformer based models
"""
parser.add_argument("--model_path", default=None, type=str, required=True,
help="Path to pre-trained model")
parser.add_argument("--load_quantized_model", action="store_true",
help="Load and perform Inference from saved quantized model,\
'quant_pytorch_model.bin' file must exist in directory and model\
type must be 'quant_<model>'")
[docs]def train_args(parser: argparse.ArgumentParser, models_family=None):
""" Add training specific arguments for Transformer based models
"""
parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
help="Path to pre-trained model or shortcut name selected in the list: "
+ ", ".join(get_models(models_family)))
parser.add_argument("--config_name", default="", type=str,
help="Pretrained config name or path if not the same as model_name")
parser.add_argument("--evaluate_during_training", action='store_true',
help="Run evaluation during training at each logging step.")
parser.add_argument("--per_gpu_train_batch_size", default=8, type=int,
help="Batch size per GPU/CPU for training.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
help="Number of updates steps to accumulate before performing a "
"backward/update pass.")
parser.add_argument("--learning_rate", default=5e-5, type=float,
help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float,
help="Weight deay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float,
help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float,
help="Max gradient norm.")
parser.add_argument("--num_train_epochs", default=3, type=int,
help="Total number of training epochs to perform.")
parser.add_argument("--max_steps", default=-1, type=int,
help="If > 0: set total number of training steps to perform. "
"Override num_train_epochs.")
parser.add_argument("--warmup_steps", default=0, type=int,
help="Linear warmup over warmup_steps.")
parser.add_argument('--logging_steps', type=int, default=50,
help="Log every X updates steps.")
parser.add_argument('--save_steps', type=int, default=500,
help="Save checkpoint every X updates steps.")
parser.add_argument("--eval_all_checkpoints", action='store_true',
help="Evaluate all checkpoints starting with the same prefix as "
+ "model_name ending and ending with step number")
parser.add_argument('--seed', type=int, default=42,
help="random seed for initialization")