Skip to content

Commit d5f1b99

Browse files
committed
Release v0.1.6
Former-commit-id: 43c8b3c3c8bfb2e32d17fb3e8b194938e37d54bd
1 parent 2144bb0 commit d5f1b99

18 files changed

Lines changed: 127 additions & 41 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
| [InternLM](https://github.com/InternLM/InternLM) | 7B | q_proj,v_proj | intern |
5656
| [Qwen](https://github.com/QwenLM/Qwen-7B) | 7B | c_attn | chatml |
5757
| [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | q_proj,v_proj | - |
58+
| [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) | 6B | query_key_value | chatglm2 |
5859

5960
- **Default module** is used for the `--lora_target` argument. Please use `python src/train_bash.py -h` to see all available options.
6061
- For the "base" models, the `--template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the corresponding template for the "chat" models.
@@ -408,6 +409,8 @@ Please follow the model licenses to use the corresponding model weights:
408409
- [Baichuan](https://huggingface.co/baichuan-inc/baichuan-7B/resolve/main/baichuan-7B%20%E6%A8%A1%E5%9E%8B%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf)
409410
- [InternLM](https://github.com/InternLM/InternLM#open-source-license)
410411
- [Qwen](https://huggingface.co/Qwen/Qwen-7B-Chat/blob/main/LICENSE)
412+
- [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf)
413+
- [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B/blob/main/MODEL_LICENSE)
411414

412415
## Citation
413416

src/llmtuner/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
from llmtuner.webui import create_ui, create_web_demo
77

88

9-
__version__ = "0.1.5"
9+
__version__ = "0.1.6"

src/llmtuner/dsets/loader.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,13 @@ def get_dataset(
9393
dataset = dataset.rename_column(getattr(dataset_attr, column_name), column_name)
9494

9595
if dataset_attr.source_prefix: # add prefix
96-
features = None
9796
if data_args.streaming:
9897
features = dataset.features
9998
features["prefix"] = Value(dtype="string", id=None)
100-
dataset = dataset.map(lambda _: {"prefix": dataset_attr.source_prefix}, features=features)
99+
dataset = dataset.map(lambda _: {"prefix": dataset_attr.source_prefix}, features=features)
100+
else:
101+
prefix_data = [dataset_attr.source_prefix] * len(dataset)
102+
dataset = dataset.add_column("prefix", prefix_data)
101103

102104
all_datasets.append(dataset)
103105

src/llmtuner/dsets/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ def split_dataset(
1919
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
2020
return {"train_dataset": train_set, "eval_dataset": val_set}
2121
else:
22-
dataset = dataset.train_test_split(test_size=data_args.val_size, seed=training_args.seed)
22+
val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size
23+
dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed)
2324
return {"train_dataset": dataset["train"], "eval_dataset": dataset["test"]}
2425
else:
2526
if data_args.streaming:

src/llmtuner/extras/constants.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@
3737
"InternLM-7B": "internlm/internlm-7b",
3838
"InternLM-7B-Chat": "internlm/internlm-chat-7b",
3939
"Qwen-7B": "Qwen/Qwen-7B",
40-
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
40+
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
41+
"XVERSE-13B": "xverse/XVERSE-13B",
42+
"ChatGLM2-6B": "THUDM/chatglm2-6b"
4143
}
4244

4345
DEFAULT_MODULE = {
@@ -48,5 +50,7 @@
4850
"Falcon": "query_key_value",
4951
"Baichuan": "W_pack",
5052
"InternLM": "q_proj,v_proj",
51-
"Qwen": "c_attn"
53+
"Qwen": "c_attn",
54+
"XVERSE": "q_proj,v_proj",
55+
"ChatGLM2": "query_key_value"
5256
}

src/llmtuner/extras/template.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def register_template(
178178
stop_words: List[str],
179179
use_history: bool
180180
) -> None:
181-
template_class = Llama2Template if name == "llama2" else Template
181+
template_class = Llama2Template if "llama2" in name else Template
182182
templates[name] = template_class(
183183
prefix=prefix,
184184
prompt=prompt,
@@ -272,6 +272,23 @@ def get_template_and_fix_tokenizer(
272272
)
273273

274274

275+
r"""
276+
Supports: https://github.com/ymcui/Chinese-LLaMA-Alpaca-2
277+
"""
278+
register_template(
279+
name="llama2_zh",
280+
prefix=[
281+
"<<SYS>>\nYou are a helpful assistant. 你是一个乐于助人的助手。\n<</SYS>>\n\n"
282+
],
283+
prompt=[
284+
"[INST] {{query}} [/INST] "
285+
],
286+
sep=[],
287+
stop_words=[],
288+
use_history=True
289+
)
290+
291+
275292
r"""
276293
Supports: https://huggingface.co/tatsu-lab/alpaca-7b-wdiff
277294
https://github.com/ymcui/Chinese-LLaMA-Alpaca

src/llmtuner/hparams/finetuning_args.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ class FinetuningArguments:
5757
Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \
5858
LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}
5959
)
60+
resume_lora_training: Optional[bool] = field(
61+
default=True,
62+
metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."}
63+
)
6064
dpo_beta: Optional[float] = field(
6165
default=0.1,
6266
metadata={"help": "The beta parameter for the DPO loss."}

src/llmtuner/hparams/model_args.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,6 @@ class ModelArguments:
5555
default=None,
5656
metadata={"help": "Path to the directory containing the checkpoints of the reward model."}
5757
)
58-
resume_lora_training: Optional[bool] = field(
59-
default=True,
60-
metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."}
61-
)
6258
plot_loss: Optional[bool] = field(
6359
default=False,
6460
metadata={"help": "Whether to plot the training loss after fine-tuning or not."}

src/llmtuner/tuner/core/adapter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def init_adapter(
6565
assert os.path.exists(os.path.join(model_args.checkpoint_dir[0], CONFIG_NAME)), \
6666
"The given checkpoint may be not a LoRA checkpoint, please specify `--finetuning_type full/freeze` instead."
6767

68-
if (is_trainable and model_args.resume_lora_training) or (not is_mergeable): # continually train on the lora weights
68+
if (is_trainable and finetuning_args.resume_lora_training) or (not is_mergeable): # continually fine-tuning
6969
checkpoints_to_merge, latest_checkpoint = model_args.checkpoint_dir[:-1], model_args.checkpoint_dir[-1]
7070
else:
7171
checkpoints_to_merge = model_args.checkpoint_dir

src/llmtuner/tuner/tune.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None):
2020
model_args, data_args, training_args, finetuning_args, generating_args, general_args = get_train_args(args)
21-
callbacks = [LogCallback()] if callbacks is None else callbacks + [LogCallback()]
21+
callbacks = [LogCallback()] if callbacks is None else callbacks
2222

2323
if general_args.stage == "pt":
2424
run_pt(model_args, data_args, training_args, finetuning_args, callbacks)

0 commit comments

Comments
 (0)