- Add OpenClaw launchd plist (gateway on port 8080) - Update Llama-3.3-70B Modelfile: fix FROM path, add tool-calling TEMPLATE, set num_ctx 32768 (fits 70B in 64GB with safe headroom) - Update Codestral-22B and Qwen3-32B Modelfiles - Add Modelfiles for all models in ~/Models/LLM and ~/Models/MLX: EVA-LLaMA-3.33-70B, Midnight-Miqu-70B, QwQ-32B, Qwen3.5-35B, Qwen3-Coder-30B, Qwen3-Coder-Next, Qwen3-VL-30B, GLM-4.6V-Flash, DeepSeek-R1-8B, gemma-3-27b, and MLX variants - Add import-local-models.sh helper script Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
from typing import List, Optional, Union
|
|
|
|
|
|
from transformers.models.llama import LlamaTokenizerFast
|
|
|
|
|
|
class DeepseekTokenizerFast(LlamaTokenizerFast):
|
|
|
|
def convert_ids_to_tokens(
|
|
self, ids: Union[int, List[int]], skip_special_tokens: bool = False
|
|
) -> Union[str, List[str]]:
|
|
"""
|
|
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
|
added tokens.
|
|
|
|
Args:
|
|
ids (`int` or `List[int]`):
|
|
The token id (or token ids) to convert to tokens.
|
|
skip_special_tokens (`bool`, *optional*, defaults to `False`):
|
|
Whether or not to remove special tokens in the decoding.
|
|
|
|
Returns:
|
|
`str` or `List[str]`: The decoded token(s).
|
|
"""
|
|
if isinstance(ids, int):
|
|
return self._convert_id_to_token(ids)
|
|
tokens = []
|
|
for index in ids:
|
|
index = int(index)
|
|
if skip_special_tokens and index in self.all_special_ids:
|
|
continue
|
|
token = self._tokenizer.id_to_token(index)
|
|
tokens.append(token if token is not None else "")
|
|
return tokens
|
|
|
|
def _convert_id_to_token(self, index: int) -> Optional[str]:
|
|
token = self._tokenizer.id_to_token(int(index))
|
|
return token if token is not None else ""
|