{
"model_name_or_path": "Qwen3-8B",
"model_size_in_billion": "8.0",
"gpu_type": "h800",
"gpu_nums": 2,
"public_datasets": [
"identity",
"alpaca_zh_demo",
"alpaca_en_demo"
],
"private_datasets": [
"QA_from_CoVLA_zh"
],
"user_id": "user123",
"user_key": "userdddd",
"cutoff_len": 2048,
"per_device_eval_batch_size": 2,
"booster": "fa2",
"max_samples": 100000,
"max_new_tokens": 512,
"quantization_bit": "None",
"quantization_method": "bnb",
"rope_scaling": "ilsder"
}curl --location --request POST '/estimator/estimated_inference_time' \
--header 'Content-Type: application/json' \
--data-raw '{
"model_name_or_path": "Qwen3-8B",
"model_size_in_billion": "8.0",
"gpu_type": "h800",
"gpu_nums": 2,
"public_datasets": [
"identity",
"alpaca_zh_demo",
"alpaca_en_demo"
],
"private_datasets": [
"QA_from_CoVLA_zh"
],
"user_id": "user123",
"user_key": "userdddd",
"cutoff_len": 2048,
"per_device_eval_batch_size": 2,
"booster": "fa2",
"max_samples": 100000,
"max_new_tokens": 512,
"quantization_bit": "None",
"quantization_method": "bnb",
"rope_scaling": "ilsder"
}'{
"predict_total_steps": 250,
"predict_step_time_sec": 0.432,
"predict_total_time_sec": 108,
"predict_total_time_human": "0:01:48",
"predict_total_time_sec_max": 138,
"predict_total_time_human_max": "0:02:18",
"predict_packing_enabled": false,
"predict_packing_speedup_factor": 0.93
}