Skip to content

vllm.reasoning.mistral_reasoning_parser

logger module-attribute

logger = init_logger(__name__)

MistralReasoningParser

Bases: BaseThinkingReasoningParser

Reasoning parser for Mistral models.

The Mistral models uses [THINK]...[/THINK] tokens to denote reasoning text. This parser extracts the reasoning content from the model output.

A valid reasoning trace should always start with a [THINK] token and end with a [/THINK] token.

If [THINK] token is not generated, then this parser only returns content.

Source code in vllm/reasoning/mistral_reasoning_parser.py
class MistralReasoningParser(BaseThinkingReasoningParser):
    """
    Reasoning parser for Mistral models.

    The Mistral models uses `[THINK]`...`[/THINK]` tokens to denote reasoning
    text. This parser extracts the reasoning content from the model output.

    A valid reasoning trace should always start with a `[THINK]` token and end with
    a `[/THINK]` token.

    If `[THINK]` token is not generated, then this parser only returns content.
    """

    def __init__(self, tokenizer: MistralTokenizer, *args, **kwargs):
        if not isinstance(tokenizer, MistralTokenizer):
            raise ValueError("The tokenizer must be an instance of MistralTokenizer.")

        ReasoningParser.__init__(self, tokenizer, *args, **kwargs)

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ReasoningParser "
                "constructor during construction."
            )

        self.start_token_id = tokenizer.tokenizer.get_control_token(self.start_token)
        self.end_token_id = tokenizer.tokenizer.get_control_token(self.end_token)

        if self.start_token_id is None or self.end_token_id is None:
            raise RuntimeError(
                "Mistral reasoning parser could not locate think start/end "
                "tokens in the tokenizer!"
            )

    @cached_property
    def start_token(self) -> str:
        """The token that starts reasoning content."""
        from mistral_common.tokens.tokenizers.base import SpecialTokens

        return SpecialTokens.begin_think

    @cached_property
    def end_token(self) -> str:
        """The token that ends reasoning content."""
        from mistral_common.tokens.tokenizers.base import SpecialTokens

        return SpecialTokens.end_think

    def is_reasoning_end(self, input_ids: list[int]) -> bool:
        has_eot_token = False

        for id in input_ids[::-1]:
            if id == self.start_token_id:
                # Reasoning ends only if a BOT token is found before a EOT token.
                return has_eot_token
            elif id == self.end_token_id:
                has_eot_token = True
        return False

    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        """
        Extract the content
        """
        has_bot_token = False
        has_eot_token = False
        bot_token_index = -1
        eot_token_index = -1
        # One for loop instead of multiple lookups
        for i, token_id in enumerate(input_ids):
            # We filter that we have multiple BOT tokens which should not
            # happen for a well prompted trained model
            if token_id == self.start_token_id and not has_bot_token:
                has_bot_token = True
                bot_token_index = i
            elif token_id == self.end_token_id:
                has_eot_token = True
                eot_token_index = i
                break

        # 1. Only BOT has been outputted
        if has_bot_token and not has_eot_token:
            # Should be = [] if model is well prompted and trained.
            return input_ids[:bot_token_index]
        # 2. Neither BOT or EOT have been outputted
        elif not has_bot_token and not has_eot_token:
            return input_ids
        # 3. Both BOT and EOT have been outputted.
        elif has_bot_token and has_eot_token:
            return input_ids[:bot_token_index] + input_ids[eot_token_index + 1 :]
        # 4. Only EOT has been outputted => this should not have occured for a model
        #    well prompted and trained.
        else:
            return input_ids[:eot_token_index] + input_ids[eot_token_index + 1 :]

    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
    ) -> tuple[str | None, str | None]:
        """
        Extract reasoning content from the model output.
        """
        if not model_output:
            return (None, "")

        # Check if the start token is present in the model output, remove it
        # if it is present.
        prev_bot_token, bot_token, post_bot_token = model_output.partition(
            self.start_token
        )

        has_bot_token = bool(bot_token)
        # Valid EOT tokens should follow BOT token
        has_valid_eot_token = has_bot_token and self.end_token in post_bot_token

        # 1. If there is BOT token followed by EOT token
        if has_bot_token and has_valid_eot_token:
            prev_eot_token, _, post_eot_token = post_bot_token.partition(self.end_token)
            # If model is well prompted and trained prev_bot_token should be ""
            content = prev_bot_token + post_eot_token
            return prev_eot_token, content if content else None
        # 2. Only BOT token
        elif has_bot_token:
            # If model is well prompted and trained prev_bot_token should be ""
            return post_bot_token, prev_bot_token if prev_bot_token else None
        # 3. EOT token has been outputted without BOT or neither has been outputted
        else:
            has_non_valid_eot_token = self.end_token in prev_bot_token
            # 3.a EOT token has been outputted without BOT
            # If model is well prompted and trained `has_non_valid_eot_token` should
            # be `False` and the parser outputs all tokens as 'content'
            if has_non_valid_eot_token:
                prev_eot_token, _, post_eot_token = prev_bot_token.partition(
                    self.end_token
                )
                return None, prev_eot_token + post_eot_token
            # 3.b neither BOT or EOT have been outputted
            else:
                return None, prev_bot_token

end_token cached property

end_token: str

The token that ends reasoning content.

end_token_id instance-attribute

end_token_id = get_control_token(end_token)

start_token cached property

start_token: str

The token that starts reasoning content.

start_token_id instance-attribute

start_token_id = get_control_token(start_token)

__init__

__init__(tokenizer: MistralTokenizer, *args, **kwargs)
Source code in vllm/reasoning/mistral_reasoning_parser.py
def __init__(self, tokenizer: MistralTokenizer, *args, **kwargs):
    if not isinstance(tokenizer, MistralTokenizer):
        raise ValueError("The tokenizer must be an instance of MistralTokenizer.")

    ReasoningParser.__init__(self, tokenizer, *args, **kwargs)

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ReasoningParser "
            "constructor during construction."
        )

    self.start_token_id = tokenizer.tokenizer.get_control_token(self.start_token)
    self.end_token_id = tokenizer.tokenizer.get_control_token(self.end_token)

    if self.start_token_id is None or self.end_token_id is None:
        raise RuntimeError(
            "Mistral reasoning parser could not locate think start/end "
            "tokens in the tokenizer!"
        )

extract_content_ids

extract_content_ids(input_ids: list[int]) -> list[int]

Extract the content

Source code in vllm/reasoning/mistral_reasoning_parser.py
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
    """
    Extract the content
    """
    has_bot_token = False
    has_eot_token = False
    bot_token_index = -1
    eot_token_index = -1
    # One for loop instead of multiple lookups
    for i, token_id in enumerate(input_ids):
        # We filter that we have multiple BOT tokens which should not
        # happen for a well prompted trained model
        if token_id == self.start_token_id and not has_bot_token:
            has_bot_token = True
            bot_token_index = i
        elif token_id == self.end_token_id:
            has_eot_token = True
            eot_token_index = i
            break

    # 1. Only BOT has been outputted
    if has_bot_token and not has_eot_token:
        # Should be = [] if model is well prompted and trained.
        return input_ids[:bot_token_index]
    # 2. Neither BOT or EOT have been outputted
    elif not has_bot_token and not has_eot_token:
        return input_ids
    # 3. Both BOT and EOT have been outputted.
    elif has_bot_token and has_eot_token:
        return input_ids[:bot_token_index] + input_ids[eot_token_index + 1 :]
    # 4. Only EOT has been outputted => this should not have occured for a model
    #    well prompted and trained.
    else:
        return input_ids[:eot_token_index] + input_ids[eot_token_index + 1 :]

extract_reasoning

extract_reasoning(
    model_output: str,
    request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]

Extract reasoning content from the model output.

Source code in vllm/reasoning/mistral_reasoning_parser.py
def extract_reasoning(
    self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]:
    """
    Extract reasoning content from the model output.
    """
    if not model_output:
        return (None, "")

    # Check if the start token is present in the model output, remove it
    # if it is present.
    prev_bot_token, bot_token, post_bot_token = model_output.partition(
        self.start_token
    )

    has_bot_token = bool(bot_token)
    # Valid EOT tokens should follow BOT token
    has_valid_eot_token = has_bot_token and self.end_token in post_bot_token

    # 1. If there is BOT token followed by EOT token
    if has_bot_token and has_valid_eot_token:
        prev_eot_token, _, post_eot_token = post_bot_token.partition(self.end_token)
        # If model is well prompted and trained prev_bot_token should be ""
        content = prev_bot_token + post_eot_token
        return prev_eot_token, content if content else None
    # 2. Only BOT token
    elif has_bot_token:
        # If model is well prompted and trained prev_bot_token should be ""
        return post_bot_token, prev_bot_token if prev_bot_token else None
    # 3. EOT token has been outputted without BOT or neither has been outputted
    else:
        has_non_valid_eot_token = self.end_token in prev_bot_token
        # 3.a EOT token has been outputted without BOT
        # If model is well prompted and trained `has_non_valid_eot_token` should
        # be `False` and the parser outputs all tokens as 'content'
        if has_non_valid_eot_token:
            prev_eot_token, _, post_eot_token = prev_bot_token.partition(
                self.end_token
            )
            return None, prev_eot_token + post_eot_token
        # 3.b neither BOT or EOT have been outputted
        else:
            return None, prev_bot_token

is_reasoning_end

is_reasoning_end(input_ids: list[int]) -> bool
Source code in vllm/reasoning/mistral_reasoning_parser.py
def is_reasoning_end(self, input_ids: list[int]) -> bool:
    has_eot_token = False

    for id in input_ids[::-1]:
        if id == self.start_token_id:
            # Reasoning ends only if a BOT token is found before a EOT token.
            return has_eot_token
        elif id == self.end_token_id:
            has_eot_token = True
    return False