Skip to content

vllm.model_executor.layers.fused_moe.router.routing_simulator_router

Classes:

DistributionBasedRouting

Bases: RoutingStrategy

Distribution-based random routing strategy with configurable distributions.

This routing strategy randomly selects experts for each token based on different probability distributions. Currently supports uniform and normal distributions for testing different routing patterns.

Methods:

  • __init__

    Initialize distribution-based routing.

  • get_distribution_info

    Get information about the current distribution configuration.

  • route_tokens

    Randomly select experts for each token using the specified distribution.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
class DistributionBasedRouting(RoutingStrategy):
    """
    Distribution-based random routing strategy with configurable distributions.

    This routing strategy randomly selects experts for each token based on
    different probability distributions. Currently supports uniform and normal
    distributions for testing different routing patterns.
    """

    def __init__(self, distribution: str = "uniform", **distribution_params: Any):
        """
        Initialize distribution-based routing.

        Args:
            distribution: Type of distribution to use for sampling
                - "uniform": Uniform distribution (default)
                - "normal": Normal/Gaussian distribution
            **distribution_params: Parameters specific to the
                chosen distribution
                For "uniform": No additional parameters needed
                For "normal": mean (default: 0.0), std (default: 1.0)
        """
        self.distribution = distribution.lower()
        self.distribution_params = distribution_params

        # Validate distribution and parameters
        self._validate_distribution_params()

    def _validate_distribution_params(self):
        """Validate distribution type and parameters."""
        valid_distributions = ["uniform", "normal"]

        if self.distribution not in valid_distributions:
            raise ValueError(
                f"Unsupported distribution: {self.distribution}. "
                f"Supported distributions: {valid_distributions}"
            )

        # Set default parameters if not provided
        if self.distribution == "normal":
            self.distribution_params.setdefault("mean", 0.0)
            self.distribution_params.setdefault("std", 1.0)

    def route_tokens(
        self,
        hidden_states: torch.Tensor,
        router_logits: torch.Tensor,
        top_k: int,
        indices_type: torch.dtype | None = None,
    ) -> tuple[torch.Tensor, torch.Tensor]:
        """
        Randomly select experts for each token using the specified distribution.

        Args:
            hidden_states: Input hidden states [num_tokens, hidden_size]
            router_logits: Router logits [num_tokens, num_experts]
            top_k: Number of experts to select per token
            indices_type: Data type for expert indices

        Returns:
            tuple of (topk_weights, topk_ids) where:
            - topk_weights: Weights based on distribution sampling
            - topk_ids: Expert indices sampled from the distribution
        """
        num_tokens = hidden_states.shape[0]
        num_experts = router_logits.shape[-1]

        if indices_type is None:
            indices_type = torch.long

        # Generate expert IDs based on the specified distribution
        topk_ids = self._sample_expert_ids(
            num_tokens, num_experts, top_k, hidden_states.device, indices_type
        )

        # Generate weights based on the distribution
        topk_weights = self._generate_weights(num_tokens, top_k, hidden_states.device)

        return topk_weights, topk_ids

    def _sample_expert_ids(
        self,
        num_tokens: int,
        num_experts: int,
        top_k: int,
        device: torch.device,
        indices_type: torch.dtype,
    ) -> torch.Tensor:
        """Sample expert IDs based on the specified distribution."""

        if self.distribution == "uniform":
            # Uniform random sampling
            return torch.randint(
                low=0,
                high=num_experts,
                size=(num_tokens, top_k),
                dtype=indices_type,
                device=device,
            )

        elif self.distribution == "normal":
            # For normal distribution, sample continuous values and map to
            # expert IDs
            continuous_samples = self._sample_continuous_distribution(
                num_tokens, top_k, device
            )

            # Map continuous samples to expert indices
            # Normalize to [0, 1] range and scale to [0, num_experts)
            normalized_samples = self._normalize_samples(continuous_samples)
            expert_ids = (normalized_samples * num_experts).long()
            expert_ids = torch.clamp(expert_ids, 0, num_experts - 1)

            return expert_ids.to(dtype=indices_type)

        else:
            raise ValueError(f"Unsupported distribution: {self.distribution}")

    def _sample_continuous_distribution(
        self, num_tokens: int, top_k: int, device: torch.device
    ) -> torch.Tensor:
        """Sample from continuous distributions."""
        shape = (num_tokens, top_k)

        if self.distribution == "normal":
            mean = self.distribution_params["mean"]
            std = self.distribution_params["std"]
            return torch.normal(mean, std, size=shape, device=device)

        else:
            raise ValueError(
                f"Unsupported continuous distribution: {self.distribution}"
            )

    def _normalize_samples(self, samples: torch.Tensor) -> torch.Tensor:
        """Normalize samples to [0, 1] range."""
        if self.distribution == "normal":
            # Use sigmoid to map normal distribution to [0, 1]
            return torch.sigmoid(samples)

        else:
            raise ValueError(
                f"Unsupported distribution for normalization: {self.distribution}"
            )

    def _generate_weights(
        self, num_tokens: int, top_k: int, device: torch.device
    ) -> torch.Tensor:
        """Generate weights based on the distribution."""
        if self.distribution == "uniform":
            # All-ones weights for uniform distribution
            return torch.ones(
                (num_tokens, top_k),
                dtype=torch.float32,
                device=device,
            )

        elif self.distribution == "normal":
            # For normal distribution, generate weights from the same
            # distribution
            continuous_weights = self._sample_continuous_distribution(
                num_tokens, top_k, device
            )
            # Normalize to positive values and sum to 1
            weights = torch.abs(continuous_weights)
            weights = weights / weights.sum(dim=-1, keepdim=True)
            return weights

        else:
            raise ValueError(
                f"Unsupported distribution for weight generation: {self.distribution}"
            )

    def get_distribution_info(self) -> dict:
        """Get information about the current distribution configuration."""
        return {
            "distribution": self.distribution,
            "parameters": self.distribution_params.copy(),
        }

__init__(distribution='uniform', **distribution_params)

Initialize distribution-based routing.

Parameters:

  • distribution

    (str, default: 'uniform' ) –

    Type of distribution to use for sampling - "uniform": Uniform distribution (default) - "normal": Normal/Gaussian distribution

  • **distribution_params

    (Any, default: {} ) –

    Parameters specific to the chosen distribution For "uniform": No additional parameters needed For "normal": mean (default: 0.0), std (default: 1.0)

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def __init__(self, distribution: str = "uniform", **distribution_params: Any):
    """
    Initialize distribution-based routing.

    Args:
        distribution: Type of distribution to use for sampling
            - "uniform": Uniform distribution (default)
            - "normal": Normal/Gaussian distribution
        **distribution_params: Parameters specific to the
            chosen distribution
            For "uniform": No additional parameters needed
            For "normal": mean (default: 0.0), std (default: 1.0)
    """
    self.distribution = distribution.lower()
    self.distribution_params = distribution_params

    # Validate distribution and parameters
    self._validate_distribution_params()

_generate_weights(num_tokens, top_k, device)

Generate weights based on the distribution.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def _generate_weights(
    self, num_tokens: int, top_k: int, device: torch.device
) -> torch.Tensor:
    """Generate weights based on the distribution."""
    if self.distribution == "uniform":
        # All-ones weights for uniform distribution
        return torch.ones(
            (num_tokens, top_k),
            dtype=torch.float32,
            device=device,
        )

    elif self.distribution == "normal":
        # For normal distribution, generate weights from the same
        # distribution
        continuous_weights = self._sample_continuous_distribution(
            num_tokens, top_k, device
        )
        # Normalize to positive values and sum to 1
        weights = torch.abs(continuous_weights)
        weights = weights / weights.sum(dim=-1, keepdim=True)
        return weights

    else:
        raise ValueError(
            f"Unsupported distribution for weight generation: {self.distribution}"
        )

_normalize_samples(samples)

Normalize samples to [0, 1] range.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def _normalize_samples(self, samples: torch.Tensor) -> torch.Tensor:
    """Normalize samples to [0, 1] range."""
    if self.distribution == "normal":
        # Use sigmoid to map normal distribution to [0, 1]
        return torch.sigmoid(samples)

    else:
        raise ValueError(
            f"Unsupported distribution for normalization: {self.distribution}"
        )

_sample_continuous_distribution(num_tokens, top_k, device)

Sample from continuous distributions.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def _sample_continuous_distribution(
    self, num_tokens: int, top_k: int, device: torch.device
) -> torch.Tensor:
    """Sample from continuous distributions."""
    shape = (num_tokens, top_k)

    if self.distribution == "normal":
        mean = self.distribution_params["mean"]
        std = self.distribution_params["std"]
        return torch.normal(mean, std, size=shape, device=device)

    else:
        raise ValueError(
            f"Unsupported continuous distribution: {self.distribution}"
        )

_sample_expert_ids(num_tokens, num_experts, top_k, device, indices_type)

Sample expert IDs based on the specified distribution.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def _sample_expert_ids(
    self,
    num_tokens: int,
    num_experts: int,
    top_k: int,
    device: torch.device,
    indices_type: torch.dtype,
) -> torch.Tensor:
    """Sample expert IDs based on the specified distribution."""

    if self.distribution == "uniform":
        # Uniform random sampling
        return torch.randint(
            low=0,
            high=num_experts,
            size=(num_tokens, top_k),
            dtype=indices_type,
            device=device,
        )

    elif self.distribution == "normal":
        # For normal distribution, sample continuous values and map to
        # expert IDs
        continuous_samples = self._sample_continuous_distribution(
            num_tokens, top_k, device
        )

        # Map continuous samples to expert indices
        # Normalize to [0, 1] range and scale to [0, num_experts)
        normalized_samples = self._normalize_samples(continuous_samples)
        expert_ids = (normalized_samples * num_experts).long()
        expert_ids = torch.clamp(expert_ids, 0, num_experts - 1)

        return expert_ids.to(dtype=indices_type)

    else:
        raise ValueError(f"Unsupported distribution: {self.distribution}")

_validate_distribution_params()

Validate distribution type and parameters.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def _validate_distribution_params(self):
    """Validate distribution type and parameters."""
    valid_distributions = ["uniform", "normal"]

    if self.distribution not in valid_distributions:
        raise ValueError(
            f"Unsupported distribution: {self.distribution}. "
            f"Supported distributions: {valid_distributions}"
        )

    # Set default parameters if not provided
    if self.distribution == "normal":
        self.distribution_params.setdefault("mean", 0.0)
        self.distribution_params.setdefault("std", 1.0)

get_distribution_info()

Get information about the current distribution configuration.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def get_distribution_info(self) -> dict:
    """Get information about the current distribution configuration."""
    return {
        "distribution": self.distribution,
        "parameters": self.distribution_params.copy(),
    }

route_tokens(hidden_states, router_logits, top_k, indices_type=None)

Randomly select experts for each token using the specified distribution.

Parameters:

  • hidden_states

    (Tensor) –

    Input hidden states [num_tokens, hidden_size]

  • router_logits

    (Tensor) –

    Router logits [num_tokens, num_experts]

  • top_k

    (int) –

    Number of experts to select per token

  • indices_type

    (dtype | None, default: None ) –

    Data type for expert indices

Returns:

  • Tensor

    tuple of (topk_weights, topk_ids) where:

  • Tensor
    • topk_weights: Weights based on distribution sampling
  • tuple[Tensor, Tensor]
    • topk_ids: Expert indices sampled from the distribution
Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def route_tokens(
    self,
    hidden_states: torch.Tensor,
    router_logits: torch.Tensor,
    top_k: int,
    indices_type: torch.dtype | None = None,
) -> tuple[torch.Tensor, torch.Tensor]:
    """
    Randomly select experts for each token using the specified distribution.

    Args:
        hidden_states: Input hidden states [num_tokens, hidden_size]
        router_logits: Router logits [num_tokens, num_experts]
        top_k: Number of experts to select per token
        indices_type: Data type for expert indices

    Returns:
        tuple of (topk_weights, topk_ids) where:
        - topk_weights: Weights based on distribution sampling
        - topk_ids: Expert indices sampled from the distribution
    """
    num_tokens = hidden_states.shape[0]
    num_experts = router_logits.shape[-1]

    if indices_type is None:
        indices_type = torch.long

    # Generate expert IDs based on the specified distribution
    topk_ids = self._sample_expert_ids(
        num_tokens, num_experts, top_k, hidden_states.device, indices_type
    )

    # Generate weights based on the distribution
    topk_weights = self._generate_weights(num_tokens, top_k, hidden_states.device)

    return topk_weights, topk_ids

RoutingSimulator

Token-to-Expert Routing Simulator.

This class provides a framework for testing and comparing different routing strategies for MoE models. It can simulate routing behavior and collect statistics for analysis.

Methods:

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
class RoutingSimulator:
    """
    Token-to-Expert Routing Simulator.

    This class provides a framework for testing and comparing different
    routing strategies for MoE models. It can simulate routing behavior
    and collect statistics for analysis.
    """

    # Class-level registry of routing strategies
    _routing_strategies: dict[str, RoutingStrategy] = {
        # Basic routing strategies
        "uniform_random": DistributionBasedRouting(
            distribution="uniform", mean=0.0, std=1.0
        ),
        "normal_routing": DistributionBasedRouting(
            distribution="normal", mean=0.0, std=1.0
        ),
    }

    @classmethod
    def register_strategy(cls, name: str, strategy: RoutingStrategy):
        """
        Register a custom routing strategy.

        Args:
            name: Name of the strategy
            strategy: RoutingStrategy instance
        """
        cls._routing_strategies[name] = strategy

    @classmethod
    def get_available_strategies(cls) -> list[str]:
        """
        Get list of available routing strategy names.

        Returns:
            List of available strategy names
        """
        return list(cls._routing_strategies.keys())

    @staticmethod
    def simulate_routing(
        hidden_states: torch.Tensor,
        router_logits: torch.Tensor,
        strategy_name: str,
        top_k: int,
        indices_type: torch.dtype | None = None,
    ) -> tuple[torch.Tensor, torch.Tensor]:
        """
        Simulate token-to-expert routing using the specified strategy.

        Args:
            hidden_states: Input hidden states [num_tokens, hidden_size]
            router_logits: Router logits [num_tokens, num_experts]
            strategy_name: Name of the routing strategy to use
            top_k: Number of experts to select per token
            indices_type: Data type for expert indices

        Returns:
            tuple of (topk_weights, topk_ids)
        """
        if strategy_name not in RoutingSimulator._routing_strategies:
            raise ValueError(
                f"Unknown routing strategy: {strategy_name}. "
                f"Available strategies: "
                f"{list(RoutingSimulator._routing_strategies.keys())}"
            )
        logger.warning_once(
            "Simulating MoE routing using a %s strategy. "
            "This should only be used for performance testing. "
            "Model outputs will not be valid.",
            strategy_name,
        )

        strategy = RoutingSimulator._routing_strategies[strategy_name]
        return strategy.route_tokens(
            hidden_states=hidden_states,
            router_logits=router_logits,
            top_k=top_k,
            indices_type=indices_type,
        )

get_available_strategies() classmethod

Get list of available routing strategy names.

Returns:

  • list[str]

    List of available strategy names

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
@classmethod
def get_available_strategies(cls) -> list[str]:
    """
    Get list of available routing strategy names.

    Returns:
        List of available strategy names
    """
    return list(cls._routing_strategies.keys())

register_strategy(name, strategy) classmethod

Register a custom routing strategy.

Parameters:

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
@classmethod
def register_strategy(cls, name: str, strategy: RoutingStrategy):
    """
    Register a custom routing strategy.

    Args:
        name: Name of the strategy
        strategy: RoutingStrategy instance
    """
    cls._routing_strategies[name] = strategy

simulate_routing(hidden_states, router_logits, strategy_name, top_k, indices_type=None) staticmethod

Simulate token-to-expert routing using the specified strategy.

Parameters:

  • hidden_states

    (Tensor) –

    Input hidden states [num_tokens, hidden_size]

  • router_logits

    (Tensor) –

    Router logits [num_tokens, num_experts]

  • strategy_name

    (str) –

    Name of the routing strategy to use

  • top_k

    (int) –

    Number of experts to select per token

  • indices_type

    (dtype | None, default: None ) –

    Data type for expert indices

Returns:

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
@staticmethod
def simulate_routing(
    hidden_states: torch.Tensor,
    router_logits: torch.Tensor,
    strategy_name: str,
    top_k: int,
    indices_type: torch.dtype | None = None,
) -> tuple[torch.Tensor, torch.Tensor]:
    """
    Simulate token-to-expert routing using the specified strategy.

    Args:
        hidden_states: Input hidden states [num_tokens, hidden_size]
        router_logits: Router logits [num_tokens, num_experts]
        strategy_name: Name of the routing strategy to use
        top_k: Number of experts to select per token
        indices_type: Data type for expert indices

    Returns:
        tuple of (topk_weights, topk_ids)
    """
    if strategy_name not in RoutingSimulator._routing_strategies:
        raise ValueError(
            f"Unknown routing strategy: {strategy_name}. "
            f"Available strategies: "
            f"{list(RoutingSimulator._routing_strategies.keys())}"
        )
    logger.warning_once(
        "Simulating MoE routing using a %s strategy. "
        "This should only be used for performance testing. "
        "Model outputs will not be valid.",
        strategy_name,
    )

    strategy = RoutingSimulator._routing_strategies[strategy_name]
    return strategy.route_tokens(
        hidden_states=hidden_states,
        router_logits=router_logits,
        top_k=top_k,
        indices_type=indices_type,
    )

RoutingSimulatorRouter

Bases: BaseRouter

Router that uses routing simulation strategies for testing/debugging.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
class RoutingSimulatorRouter(BaseRouter):
    """Router that uses routing simulation strategies for testing/debugging."""

    def __init__(
        self,
        top_k: int,
        global_num_experts: int,
        eplb_state: EplbLayerState | None = None,
        indices_type_getter: Callable[[], torch.dtype | None] | None = None,
    ):
        super().__init__(
            top_k=top_k,
            global_num_experts=global_num_experts,
            eplb_state=eplb_state,
            indices_type_getter=indices_type_getter,
        )

    @property
    def routing_method_type(self) -> RoutingMethodType:
        return RoutingMethodType.Simulated

    def _compute_routing(
        self,
        hidden_states: torch.Tensor,
        router_logits: torch.Tensor,
        indices_type: torch.dtype | None,
        *,
        input_ids: torch.Tensor | None = None,
    ) -> tuple[torch.Tensor, torch.Tensor]:
        """Use routing simulator to compute routing."""
        routing_strategy = envs.VLLM_MOE_ROUTING_SIMULATION_STRATEGY
        topk_weights, topk_ids = RoutingSimulator.simulate_routing(
            hidden_states=hidden_states,
            router_logits=router_logits,
            strategy_name=routing_strategy,
            top_k=self.top_k,
            indices_type=indices_type,
        )
        return topk_weights, topk_ids

_compute_routing(hidden_states, router_logits, indices_type, *, input_ids=None)

Use routing simulator to compute routing.

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
def _compute_routing(
    self,
    hidden_states: torch.Tensor,
    router_logits: torch.Tensor,
    indices_type: torch.dtype | None,
    *,
    input_ids: torch.Tensor | None = None,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Use routing simulator to compute routing."""
    routing_strategy = envs.VLLM_MOE_ROUTING_SIMULATION_STRATEGY
    topk_weights, topk_ids = RoutingSimulator.simulate_routing(
        hidden_states=hidden_states,
        router_logits=router_logits,
        strategy_name=routing_strategy,
        top_k=self.top_k,
        indices_type=indices_type,
    )
    return topk_weights, topk_ids

RoutingStrategy

Bases: ABC

Base class for token-to-expert routing strategies.

Methods:

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
class RoutingStrategy(ABC):
    """Base class for token-to-expert routing strategies."""

    @abstractmethod
    def route_tokens(
        self,
        hidden_states: torch.Tensor,
        router_logits: torch.Tensor,
        top_k: int,
        indices_type: torch.dtype | None = None,
    ) -> tuple[torch.Tensor, torch.Tensor]:
        """
        Route tokens to experts.

        Args:
            hidden_states: Input hidden states [num_tokens, hidden_size]
            router_logits: Router logits [num_tokens, num_experts]
            top_k: Number of experts to select per token
            indices_type: Data type for expert indices

        Returns:
            tuple of (topk_weights, topk_ids)
        """
        pass

route_tokens(hidden_states, router_logits, top_k, indices_type=None) abstractmethod

Route tokens to experts.

Parameters:

  • hidden_states

    (Tensor) –

    Input hidden states [num_tokens, hidden_size]

  • router_logits

    (Tensor) –

    Router logits [num_tokens, num_experts]

  • top_k

    (int) –

    Number of experts to select per token

  • indices_type

    (dtype | None, default: None ) –

    Data type for expert indices

Returns:

Source code in vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
@abstractmethod
def route_tokens(
    self,
    hidden_states: torch.Tensor,
    router_logits: torch.Tensor,
    top_k: int,
    indices_type: torch.dtype | None = None,
) -> tuple[torch.Tensor, torch.Tensor]:
    """
    Route tokens to experts.

    Args:
        hidden_states: Input hidden states [num_tokens, hidden_size]
        router_logits: Router logits [num_tokens, num_experts]
        top_k: Number of experts to select per token
        indices_type: Data type for expert indices

    Returns:
        tuple of (topk_weights, topk_ids)
    """
    pass