修复导出的模型
python - <<'PY'import json path = "tokenizer_config.json" with open(path, "r", encoding="utf-8") as f: cfg = json.load(f) if isinstance(cfg.get("extra_special_…
作者:lh
python - <<'PY'
import json
path = "tokenizer_config.json"
with open(path, "r", encoding="utf-8") as f:
cfg = json.load(f)
if isinstance(cfg.get("extra_special_tokens"), list):
print("extra_special_tokens 是 list,删除该字段")
cfg.pop("extra_special_tokens", None)
with open(path, "w", encoding="utf-8") as f:
json.dump(cfg, f, ensure_ascii=False, indent=2)
print("修复完成")
PY
测试tokenizer
python - <<'PY'
from transformers import AutoTokenizer
model_path = "/你的模型目录"
tok = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
print("tokenizer ok")
PY