{
"status": "0",
"data": {
"lines": [
"2025-07-28 00:39:55.769 [INFO] * Starting OpenBSD Secure Shell server sshd",
"2025-07-28 00:39:55.786 [INFO] ...done.",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/condabin/conda",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/bin/conda",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/bin/conda-env",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/bin/activate",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/bin/deactivate",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/etc/profile.d/conda.sh",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/etc/fish/conf.d/conda.fish",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/shell/condabin/Conda.psm1",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/shell/condabin/conda-hook.ps1",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/lib/python3.11/site-packages/xontrib/conda.xsh",
"2025-07-28 00:39:56.245 [INFO] no change /opt/conda/etc/profile.d/conda.csh",
"2025-07-28 00:39:56.245 [INFO] modified /root/.bashrc",
"2025-07-28 00:39:56.245 [INFO] ",
"2025-07-28 00:39:56.245 [INFO] ==> For changes to take effect, close and re-open your current shell. <==",
"2025-07-28 00:39:56.245 [INFO] ",
"2025-07-28 00:39:56.291 [INFO] 2025-07-28 00:39:56: Checking /workspace directory structure...",
"2025-07-28 00:39:56.294 [INFO] 2025-07-28 00:39:56: MODELS: cp /shared-only/llamafactory_src/constants.py to /LLaMA-Factory/constants.py",
"2025-07-28 00:39:56.322 [INFO] 2025-07-28 00:39:56: MODELS: CP constants.py done",
"2025-07-28 00:39:56.326 [INFO] 2025-07-28 00:39:56: Starting create rclone.conf...",
"2025-07-28 00:39:56.334 [INFO] 2025-07-28 00:39:56: Created rclone.conf successfully.",
"2025-07-28 00:39:56.343 [INFO] 2025-07-28 00:39:56: create /workspace/tmp successfully.",
"2025-07-28 00:39:56.351 [INFO] 2025-07-28 00:39:56: Copying /shared-only/datasets/dataset_info.json to /workspace/llamafactory/data/dataset_info.json",
"2025-07-28 00:39:56.385 [INFO] 2025-07-28 00:39:56: Copy completed.",
"2025-07-28 00:39:56.388 [INFO] 2025-07-28 00:39:56: Sourcing conda.sh...",
"2025-07-28 00:39:56.392 [INFO] 2025-07-28 00:39:56: Activating conda environment...",
"2025-07-28 00:39:56.823 [INFO] 2025-07-28 00:39:56: Conda environment activated successfully.",
"2025-07-28 00:39:56.824 [INFO] 传递的参数:llamafactory-cli train --stage sft --do_train True --model_name_or_path /shared-only/models/Qwen/Qwen3-8B --preprocessing_num_workers 16 --finetuning_type freeze --template qwen3 --flash_attn auto --dataset_dir /workspace/llamafactory/data --dataset alpaca_zh_demo --cutoff_len 2048 --learning_rate 5e-05 --num_train_epochs 3.0 --max_samples 100000 --per_device_train_batch_size 2 --gradient_accumulation_steps 8 --lr_scheduler_type cosine --max_grad_norm 1.0 --logging_steps 5 --save_steps 100 --warmup_steps 0 --packing False --enable_thinking True --report_to none --use_swanlab True --output_dir /workspace/llamafactory/output_dir/Qwen3-8B-Instruct/lora/train_2025-07-26-13-42-58 --bf16 True --plot_loss True --trust_remote_code True --ddp_timeout 180000000 --include_num_input_tokens_seen True --optim adamw_torch --lora_rank 8 --lora_alpha 16 --lora_dropout 0 --lora_target all --swanlab_project llamafactory --swanlab_run_name schedule-00 --swanlab_api_key pTU7UrqHR1QjlIqWW5EXG --swanlab_mode cloud --val_size 0.1 --eval_strategy steps",
"2025-07-28 00:40:07.799 [INFO] [2025-07-28 00:40:07,799] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)",
"2025-07-28 00:40:07.875 [ERROR] df: /root/.triton/autotune: No such file or directory",
"2025-07-28 00:40:09.256 [INFO] INFO 07-28 00:40:09 [__init__.py:244] Automatically detected platform cuda.",
"2025-07-28 00:40:13.927 [INFO] [INFO|2025-07-28 00:40:13] llamafactory.hparams.parser:406 >> Process rank: 0, world size: 1, device: cuda:0, distributed training: False, compute dtype: torch.bfloat16",
"2025-07-28 00:40:14.080 [INFO] ",
"2025-07-28 00:40:14.581 [INFO] \u001b[1m\u001b[34mswanlab\u001b[0m\u001b[0m: \\ Waiting for the swanlab cloud response.",
"2025-07-28 00:40:14.582 [INFO] ",
"2025-07-28 00:40:14.613 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:14,613 >> loading file vocab.json",
"2025-07-28 00:40:14.613 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:14,613 >> loading file merges.txt",
"2025-07-28 00:40:14.614 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:14,613 >> loading file tokenizer.json",
"2025-07-28 00:40:14.614 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:14,613 >> loading file added_tokens.json",
"2025-07-28 00:40:14.614 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:14,613 >> loading file special_tokens_map.json",
"2025-07-28 00:40:14.614 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:14,613 >> loading file tokenizer_config.json",
"2025-07-28 00:40:14.614 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:14,614 >> loading file chat_template.jinja",
"2025-07-28 00:40:15.148 [ERROR] [INFO|tokenization_utils_base.py:2299] 2025-07-28 00:40:15,148 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.",
"2025-07-28 00:40:15.169 [ERROR] [INFO|configuration_utils.py:696] 2025-07-28 00:40:15,169 >> loading configuration file /shared-only/models/Qwen/Qwen3-8B/config.json",
"2025-07-28 00:40:15.173 [ERROR] [INFO|configuration_utils.py:770] 2025-07-28 00:40:15,172 >> Model config Qwen3Config {",
"2025-07-28 00:40:15.173 [ERROR] \"architectures\": [",
"2025-07-28 00:40:15.173 [ERROR] \"Qwen3ForCausalLM\"",
"2025-07-28 00:40:15.173 [ERROR] ],",
"2025-07-28 00:40:15.173 [ERROR] \"attention_bias\": false,",
"2025-07-28 00:40:15.173 [ERROR] \"attention_dropout\": 0.0,",
"2025-07-28 00:40:15.173 [ERROR] \"bos_token_id\": 151643,",
"2025-07-28 00:40:15.173 [ERROR] \"eos_token_id\": 151645,",
"2025-07-28 00:40:15.173 [ERROR] \"head_dim\": 128,",
"2025-07-28 00:40:15.173 [ERROR] \"hidden_act\": \"silu\",",
"2025-07-28 00:40:15.173 [ERROR] \"hidden_size\": 4096,",
"2025-07-28 00:40:15.173 [ERROR] \"initializer_range\": 0.02,",
"2025-07-28 00:40:15.173 [ERROR] \"intermediate_size\": 12288,",
"2025-07-28 00:40:15.173 [ERROR] \"max_position_embeddings\": 40960,",
"2025-07-28 00:40:15.173 [ERROR] \"max_window_layers\": 36,",
"2025-07-28 00:40:15.173 [ERROR] \"model_type\": \"qwen3\",",
"2025-07-28 00:40:15.173 [ERROR] \"num_attention_heads\": 32,",
"2025-07-28 00:40:15.173 [ERROR] \"num_hidden_layers\": 36,",
"2025-07-28 00:40:15.173 [ERROR] \"num_key_value_heads\": 8,",
"2025-07-28 00:40:15.173 [ERROR] \"rms_norm_eps\": 1e-06,",
"2025-07-28 00:40:15.173 [ERROR] \"rope_scaling\": null,",
"2025-07-28 00:40:15.173 [ERROR] \"rope_theta\": 1000000,",
"2025-07-28 00:40:15.173 [ERROR] \"sliding_window\": null,",
"2025-07-28 00:40:15.173 [ERROR] \"tie_word_embeddings\": false,",
"2025-07-28 00:40:15.173 [ERROR] \"torch_dtype\": \"bfloat16\",",
"2025-07-28 00:40:15.173 [ERROR] \"transformers_version\": \"4.52.4\",",
"2025-07-28 00:40:15.173 [ERROR] \"use_cache\": true,",
"2025-07-28 00:40:15.173 [ERROR] \"use_sliding_window\": false,",
"2025-07-28 00:40:15.173 [ERROR] \"vocab_size\": 151936",
"2025-07-28 00:40:15.173 [ERROR] }",
"2025-07-28 00:40:15.173 [ERROR] ",
"2025-07-28 00:40:15.173 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:15,173 >> loading file vocab.json",
"2025-07-28 00:40:15.174 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:15,173 >> loading file merges.txt",
"2025-07-28 00:40:15.174 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:15,173 >> loading file tokenizer.json",
"2025-07-28 00:40:15.174 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:15,173 >> loading file added_tokens.json",
"2025-07-28 00:40:15.174 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:15,173 >> loading file special_tokens_map.json",
"2025-07-28 00:40:15.174 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:15,173 >> loading file tokenizer_config.json",
"2025-07-28 00:40:15.174 [ERROR] [INFO|tokenization_utils_base.py:2021] 2025-07-28 00:40:15,173 >> loading file chat_template.jinja",
"2025-07-28 00:40:15.598 [ERROR] [INFO|tokenization_utils_base.py:2299] 2025-07-28 00:40:15,597 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.",
"2025-07-28 00:40:15.625 [INFO] [INFO|2025-07-28 00:40:15] llamafactory.data.loader:143 >> Loading dataset /shared-only/datasets/alpaca_zh_demo.json...",
"2025-07-28 00:40:16.575 [ERROR] Setting num_proc from 16 back to 1 for the train split to disable multiprocessing as it only contains one shard.",
"2025-07-28 00:40:16.575 [ERROR] WARNING:datasets.builder:Setting num_proc from 16 back to 1 for the train split to disable multiprocessing as it only contains one shard.",
"2025-07-28 00:40:16.575 [ERROR] ",
"2025-07-28 00:40:16.607 [ERROR] Generating train split: 0 examples [00:00, ? examples/s]",
"2025-07-28 00:40:16.607 [ERROR] Generating train split: 1000 examples [00:00, 31771.42 examples/s]",
"2025-07-28 00:40:16.821 [ERROR] ",
"2025-07-28 00:40:16.942 [ERROR] Converting format of dataset (num_proc=16): 0%| | 0/1000 [00:00<?, ? examples/s]",
"2025-07-28 00:40:17.044 [ERROR] Converting format of dataset (num_proc=16): 13%|█▎ | 126/1000 [00:00<00:00, 1049.68 examples/s]",
"2025-07-28 00:40:17.207 [ERROR] Converting format of dataset (num_proc=16): 50%|█████ | 502/1000 [00:00<00:00, 2494.65 examples/s]",
"2025-07-28 00:40:17.207 [ERROR] Converting format of dataset (num_proc=16): 100%|██████████| 1000/1000 [00:00<00:00, 2595.17 examples/s]",
"2025-07-28 00:40:17.610 [ERROR] ",
"2025-07-28 00:40:18.341 [ERROR] Running tokenizer on dataset (num_proc=16): 0%| | 0/1000 [00:00<?, ? examples/s]",
"2025-07-28 00:40:18.783 [ERROR] Running tokenizer on dataset (num_proc=16): 6%|▋ | 63/1000 [00:00<00:10, 86.22 examples/s]",
"2025-07-28 00:40:19.265 [ERROR] Running tokenizer on dataset (num_proc=16): 13%|█▎ | 126/1000 [00:01<00:07, 112.24 examples/s]",
"2025-07-28 00:40:19.482 [ERROR] Running tokenizer on dataset (num_proc=16): 25%|██▌ | 252/1000 [00:01<00:04, 174.80 examples/s]"
]
}
}