Skip to content

Runtime Plan API

The runtime plan captures backend selection, support level, and specialization state before execution begins.

Bases: str, Enum

Describe whether specialization is absent, planned, applied, or replaced by fallback.

Source code in src/ollm/runtime/plan.py
13
14
15
16
17
18
19
class SpecializationState(str, Enum):
    """Describe whether specialization is absent, planned, applied, or replaced by fallback."""

    NOT_PLANNED = "not-planned"
    PLANNED = "planned"
    APPLIED = "applied"
    FALLBACK = "fallback"

Describe how oLLM intends to execute a resolved model reference.

Attributes:

Name Type Description
resolved_model ResolvedModel

Final resolved model metadata for the plan.

backend_id str | None

Selected backend identifier when the plan is executable.

model_path Path | None

Local materialized model path when one exists.

support_level SupportLevel

Planned support level.

generic_model_kind GenericModelKind | None

Generic execution family when one applies.

supports_disk_cache bool

Whether the selected backend supports disk KV cache behavior.

supports_cpu_offload bool

Whether CPU offload controls are supported.

supports_gpu_offload bool

Whether GPU offload controls are supported.

specialization_enabled bool

Whether specialization is enabled for the current request.

specialization_applied bool

Whether specialization has already been applied.

specialization_provider_id str | None

Matching specialization provider identifier.

specialization_state SpecializationState

Current specialization lifecycle state.

reason str

Human-readable plan summary.

specialization_pass_ids tuple[SpecializationPassId, ...]

Planned specialization passes.

applied_specialization_pass_ids tuple[SpecializationPassId, ...]

Applied specialization passes.

fallback_reason str | None

Fallback reason when specialization failed.

details dict[str, str]

Extra serialized inspection details.

Source code in src/ollm/runtime/plan.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@dataclass(frozen=True, slots=True)
class RuntimePlan:
    """Describe how oLLM intends to execute a resolved model reference.

    Attributes:
        resolved_model (ResolvedModel): Final resolved model metadata for the
            plan.
        backend_id (str | None): Selected backend identifier when the plan is
            executable.
        model_path (Path | None): Local materialized model path when one exists.
        support_level (SupportLevel): Planned support level.
        generic_model_kind (GenericModelKind | None): Generic execution family
            when one applies.
        supports_disk_cache (bool): Whether the selected backend supports disk
            KV cache behavior.
        supports_cpu_offload (bool): Whether CPU offload controls are supported.
        supports_gpu_offload (bool): Whether GPU offload controls are supported.
        specialization_enabled (bool): Whether specialization is enabled for the
            current request.
        specialization_applied (bool): Whether specialization has already been
            applied.
        specialization_provider_id (str | None): Matching specialization
            provider identifier.
        specialization_state (SpecializationState): Current specialization
            lifecycle state.
        reason (str): Human-readable plan summary.
        specialization_pass_ids (tuple[SpecializationPassId, ...]): Planned
            specialization passes.
        applied_specialization_pass_ids (tuple[SpecializationPassId, ...]):
            Applied specialization passes.
        fallback_reason (str | None): Fallback reason when specialization failed.
        details (dict[str, str]): Extra serialized inspection details.
    """

    resolved_model: ResolvedModel
    backend_id: str | None
    model_path: Path | None
    support_level: SupportLevel
    generic_model_kind: GenericModelKind | None
    supports_disk_cache: bool
    supports_cpu_offload: bool
    supports_gpu_offload: bool
    specialization_enabled: bool
    specialization_applied: bool
    specialization_provider_id: str | None
    specialization_state: SpecializationState
    reason: str
    specialization_pass_ids: tuple[SpecializationPassId, ...] = ()
    applied_specialization_pass_ids: tuple[SpecializationPassId, ...] = ()
    fallback_reason: str | None = None
    details: dict[str, str] = field(default_factory=dict)

    def is_executable(self) -> bool:
        """Return whether the plan resolved to a runnable backend.

        Returns:
            bool: ``True`` when a backend ID was selected.
        """
        return self.backend_id is not None

    def as_dict(self) -> dict[str, object]:
        """Return a JSON-serializable representation of the runtime plan.

        Returns:
            dict[str, object]: Serialized runtime plan payload.
        """
        return {
            "backend_id": self.backend_id,
            "model_path": None if self.model_path is None else str(self.model_path),
            "support_level": self.support_level.value,
            "generic_model_kind": None
            if self.generic_model_kind is None
            else self.generic_model_kind.value,
            "supports_disk_cache": self.supports_disk_cache,
            "supports_cpu_offload": self.supports_cpu_offload,
            "supports_gpu_offload": self.supports_gpu_offload,
            "specialization_enabled": self.specialization_enabled,
            "specialization_applied": self.specialization_applied,
            "specialization_provider_id": self.specialization_provider_id,
            "specialization_state": self.specialization_state.value,
            "specialization_pass_ids": [
                pass_id.value for pass_id in self.specialization_pass_ids
            ],
            "applied_specialization_pass_ids": [
                pass_id.value for pass_id in self.applied_specialization_pass_ids
            ],
            "fallback_reason": self.fallback_reason,
            "reason": self.reason,
            "details": dict(self.details),
        }

is_executable

is_executable() -> bool

Return whether the plan resolved to a runnable backend.

Returns:

Name Type Description
bool bool

True when a backend ID was selected.

Source code in src/ollm/runtime/plan.py
74
75
76
77
78
79
80
def is_executable(self) -> bool:
    """Return whether the plan resolved to a runnable backend.

    Returns:
        bool: ``True`` when a backend ID was selected.
    """
    return self.backend_id is not None

as_dict

as_dict() -> dict[str, object]

Return a JSON-serializable representation of the runtime plan.

Returns:

Type Description
dict[str, object]

dict[str, object]: Serialized runtime plan payload.

Source code in src/ollm/runtime/plan.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def as_dict(self) -> dict[str, object]:
    """Return a JSON-serializable representation of the runtime plan.

    Returns:
        dict[str, object]: Serialized runtime plan payload.
    """
    return {
        "backend_id": self.backend_id,
        "model_path": None if self.model_path is None else str(self.model_path),
        "support_level": self.support_level.value,
        "generic_model_kind": None
        if self.generic_model_kind is None
        else self.generic_model_kind.value,
        "supports_disk_cache": self.supports_disk_cache,
        "supports_cpu_offload": self.supports_cpu_offload,
        "supports_gpu_offload": self.supports_gpu_offload,
        "specialization_enabled": self.specialization_enabled,
        "specialization_applied": self.specialization_applied,
        "specialization_provider_id": self.specialization_provider_id,
        "specialization_state": self.specialization_state.value,
        "specialization_pass_ids": [
            pass_id.value for pass_id in self.specialization_pass_ids
        ],
        "applied_specialization_pass_ids": [
            pass_id.value for pass_id in self.applied_specialization_pass_ids
        ],
        "fallback_reason": self.fallback_reason,
        "reason": self.reason,
        "details": dict(self.details),
    }