CustomizeDeployment
Export to HuggingFace
这个 recipe 展示完整流程:在 MinT 上训练一个 LoRA model,合并 weights,把结果发布到 HuggingFace Hub 给社区使用。
Use Case
- Model 共享:把训好的 model 发布出去,其它用户可以下载使用。
- 可复现:和研究 / 博客一起提供 weights。
- 社区贡献:把领域特定的微调成果共享给开源社区。
- 版本管理:在 Hub 上用 tag 维护一个 model 的多个版本。
In Practice
import asyncio
import torch
from pathlib import Path
from huggingface_hub import HfApi, HfFolder
import mint
from mint import types
from transformers import AutoModelForCausalLM, AutoTokenizer
async def train_and_publish_to_hub():
service_client = mint.ServiceClient()
# Step 1:在 MinT 上训练
print("=== Training on MinT ===")
training_client = await service_client.create_lora_training_client_async(
base_model="Qwen/Qwen3-0.6B",
rank=16,
)
tokenizer = training_client.get_tokenizer()
adam_params = types.AdamParams(learning_rate=5e-5)
# 用 instruction tuning 样本训练
training_examples = [
"Write a haiku about spring:\nGreen leaves emerge soft\nWarmth returns to sleeping earth\nLife renews again",
"Explain quantum computing:\nQuantum computers process information using qubits in superposition.",
]
for i, example in enumerate(training_examples):
tokens = tokenizer.encode(example)
model_input = types.ModelInput.from_ints(tokens[:-1])
target_tokens = tokens[1:]
weights = [1.0] * len(target_tokens)
datum = types.Datum(
model_input=model_input,
loss_fn_inputs={"target_tokens": target_tokens, "weights": weights},
)
result = await training_client.forward_backward_async([datum], loss_fn="cross_entropy")
await result.result_async()
optim_future = training_client.optim_step_async(adam_params)
await optim_future.result_async()
print(f" Example {i+1}: trained")
# Step 2:保存 checkpoint
checkpoint = await training_client.save_weights_for_sampler_async(
name="poetry-assistant-v1"
)
checkpoint = await checkpoint.result_async()
print("Checkpoint saved")
# Step 3:下载并合并 weights
print("\n=== Downloading and Merging Weights ===")
base_model_id = "Qwen/Qwen3-0.6B"
base_model = AutoModelForCausalLM.from_pretrained(
base_model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
# 下载 LoRA weights(用 MinT API)
# lora_weights = checkpoint.download_weights(...)
# merged_model = merge_lora_weights(base_model, lora_weights)
# demo 里直接用 base model
merged_model = base_model
# Step 4:创建带元数据的 model 目录
print("\n=== Preparing for Hub Upload ===")
model_dir = Path("./poetry-assistant")
model_dir.mkdir(exist_ok=True)
# 保存 model 和 tokenizer
merged_model.save_pretrained(model_dir)
base_tokenizer.save_pretrained(model_dir)
# 创建 README
readme_content = """
# Poetry Assistant
Fine-tuned Qwen3-0.6B for poetry and creative writing.
## Training
- Base model: `Qwen/Qwen3-0.6B`
- Training method: LoRA fine-tuning on MinT
- Rank: 16
- Learning rate: 5e-5
- Training examples: 2
## Usage
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("your-username/poetry-assistant")
tokenizer = AutoTokenizer.from_pretrained("your-username/poetry-assistant")
prompt = "Write a haiku about spring:"
inputs = tokenizer.encode(prompt, return_tensors="pt")
outputs = model.generate(inputs, max_length=50)
print(tokenizer.decode(outputs[0]))
```
## License
Same as base model.
"""
with open(model_dir / "README.md", "w") as f:
f.write(readme_content)
# Step 5:上传到 HuggingFace Hub
print("\n=== Uploading to HuggingFace Hub ===")
# 认证(前提是已经跑过 `huggingface-cli login`)
# 或者设置 HUGGING_FACE_HUB_TOKEN 环境变量
hf_api = HfApi()
hub_repo_id = "your-username/poetry-assistant"
# 不存在就创建 repo
try:
hf_api.create_repo(repo_id=hub_repo_id, private=False)
print(f"Created repo: {hub_repo_id}")
except Exception as e:
print(f"Repo already exists or error: {e}")
# 上传文件
hf_api.upload_folder(
folder_path=str(model_dir),
repo_id=hub_repo_id,
commit_message="Initial release: LoRA fine-tuned on MinT",
)
print(f"Model uploaded to: https://huggingface.co/{hub_repo_id}")
print("\nYour model is now public and can be loaded with:")
print(f' model = AutoModelForCausalLM.from_pretrained("{hub_repo_id}")')
asyncio.run(train_and_publish_to_hub())完整源码:https://github.com/MindLab-Research/mint-quickstart/blob/main/recipes/lora_adapter.py(export 子集)
Verified Run
把微调后的 Qwen3-0.6B 发布到 HuggingFace Hub:
- Model 大小:LoRA weights 约 50MB + 配置文件约 1MB。
- 上传时间:在普通网络下约 30 秒。
- 下载可用性:Model 立刻能通过
from_pretrained()加载。 - 社区指标:Model 出现在 HuggingFace 搜索中,会拿到其它用户的下载 / star。
- 版本管理:用 git tag 标记发布(v1.0、v1.1 等)做版本控制。