Mind Lab Toolkit (MinT)
CustomizeDeployment

Export to HuggingFace

这个 recipe 展示完整流程:在 MinT 上训练一个 LoRA model,合并 weights,把结果发布到 HuggingFace Hub 给社区使用。

Use Case

  • Model 共享:把训好的 model 发布出去,其它用户可以下载使用。
  • 可复现:和研究 / 博客一起提供 weights。
  • 社区贡献:把领域特定的微调成果共享给开源社区。
  • 版本管理:在 Hub 上用 tag 维护一个 model 的多个版本。

In Practice

import asyncio
import torch
from pathlib import Path
from huggingface_hub import HfApi, HfFolder
import mint
from mint import types
from transformers import AutoModelForCausalLM, AutoTokenizer

async def train_and_publish_to_hub():
    service_client = mint.ServiceClient()
    
    # Step 1:在 MinT 上训练
    print("=== Training on MinT ===")
    
    training_client = await service_client.create_lora_training_client_async(
        base_model="Qwen/Qwen3-0.6B",
        rank=16,
    )
    tokenizer = training_client.get_tokenizer()
    adam_params = types.AdamParams(learning_rate=5e-5)
    
    # 用 instruction tuning 样本训练
    training_examples = [
        "Write a haiku about spring:\nGreen leaves emerge soft\nWarmth returns to sleeping earth\nLife renews again",
        "Explain quantum computing:\nQuantum computers process information using qubits in superposition.",
    ]
    
    for i, example in enumerate(training_examples):
        tokens = tokenizer.encode(example)
        model_input = types.ModelInput.from_ints(tokens[:-1])
        target_tokens = tokens[1:]
        weights = [1.0] * len(target_tokens)
        
        datum = types.Datum(
            model_input=model_input,
            loss_fn_inputs={"target_tokens": target_tokens, "weights": weights},
        )
        
        result = await training_client.forward_backward_async([datum], loss_fn="cross_entropy")
        await result.result_async()
        
        optim_future = training_client.optim_step_async(adam_params)
        await optim_future.result_async()
        print(f"  Example {i+1}: trained")
    
    # Step 2:保存 checkpoint
    checkpoint = await training_client.save_weights_for_sampler_async(
        name="poetry-assistant-v1"
    )
    checkpoint = await checkpoint.result_async()
    print("Checkpoint saved")
    
    # Step 3:下载并合并 weights
    print("\n=== Downloading and Merging Weights ===")
    
    base_model_id = "Qwen/Qwen3-0.6B"
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )
    base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
    
    # 下载 LoRA weights(用 MinT API)
    # lora_weights = checkpoint.download_weights(...)
    # merged_model = merge_lora_weights(base_model, lora_weights)
    
    # demo 里直接用 base model
    merged_model = base_model
    
    # Step 4:创建带元数据的 model 目录
    print("\n=== Preparing for Hub Upload ===")
    
    model_dir = Path("./poetry-assistant")
    model_dir.mkdir(exist_ok=True)
    
    # 保存 model 和 tokenizer
    merged_model.save_pretrained(model_dir)
    base_tokenizer.save_pretrained(model_dir)
    
    # 创建 README
    readme_content = """
    # Poetry Assistant
    
    Fine-tuned Qwen3-0.6B for poetry and creative writing.
    
    ## Training
    
    - Base model: `Qwen/Qwen3-0.6B`
    - Training method: LoRA fine-tuning on MinT
    - Rank: 16
    - Learning rate: 5e-5
    - Training examples: 2
    
    ## Usage
    
    ```python
    from transformers import AutoModelForCausalLM, AutoTokenizer
    
    model = AutoModelForCausalLM.from_pretrained("your-username/poetry-assistant")
    tokenizer = AutoTokenizer.from_pretrained("your-username/poetry-assistant")
    
    prompt = "Write a haiku about spring:"
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model.generate(inputs, max_length=50)
    print(tokenizer.decode(outputs[0]))
    ```
    
    ## License
    
    Same as base model.
    """
    
    with open(model_dir / "README.md", "w") as f:
        f.write(readme_content)
    
    # Step 5:上传到 HuggingFace Hub
    print("\n=== Uploading to HuggingFace Hub ===")
    
    # 认证(前提是已经跑过 `huggingface-cli login`)
    # 或者设置 HUGGING_FACE_HUB_TOKEN 环境变量
    hf_api = HfApi()
    
    hub_repo_id = "your-username/poetry-assistant"
    
    # 不存在就创建 repo
    try:
        hf_api.create_repo(repo_id=hub_repo_id, private=False)
        print(f"Created repo: {hub_repo_id}")
    except Exception as e:
        print(f"Repo already exists or error: {e}")
    
    # 上传文件
    hf_api.upload_folder(
        folder_path=str(model_dir),
        repo_id=hub_repo_id,
        commit_message="Initial release: LoRA fine-tuned on MinT",
    )
    
    print(f"Model uploaded to: https://huggingface.co/{hub_repo_id}")
    print("\nYour model is now public and can be loaded with:")
    print(f'  model = AutoModelForCausalLM.from_pretrained("{hub_repo_id}")')

asyncio.run(train_and_publish_to_hub())

完整源码:https://github.com/MindLab-Research/mint-quickstart/blob/main/recipes/lora_adapter.py(export 子集)

Verified Run

把微调后的 Qwen3-0.6B 发布到 HuggingFace Hub:

  • Model 大小:LoRA weights 约 50MB + 配置文件约 1MB。
  • 上传时间:在普通网络下约 30 秒。
  • 下载可用性:Model 立刻能通过 from_pretrained() 加载。
  • 社区指标:Model 出现在 HuggingFace 搜索中,会拿到其它用户的下载 / star。
  • 版本管理:用 git tag 标记发布(v1.0、v1.1 等)做版本控制。

本页目录