@@ -100,23 +100,24 @@ type Flavor struct {
100100 Name FlavorName `json:"name"`
101101 // Requests defines the required accelerators to serve the model for each replica,
102102 // like <nvidia.com/gpu: 8>. For multi-hosts cases, the requests here indicates
103- // the resource requirements for each replica. This may change in the future .
103+ // the resource requirements for each replica, usually equals to the TP size .
104104 // Not recommended to set the cpu and memory usage here:
105105 // - if using playground, you can define the cpu/mem usage at backendConfig.
106106 // - if using inference service, you can define the cpu/mem at the container resources.
107107 // However, if you define the same accelerator requests at playground/service as well,
108- // the requests here will be covered .
108+ // the requests will be overwritten by the flavor requests .
109109 // +optional
110110 Requests v1.ResourceList `json:"requests,omitempty"`
111111 // NodeSelector represents the node candidates for Pod placements, if a node doesn't
112112 // meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin.
113113 // If nodeSelector is empty, it means every node is a candidate.
114114 // +optional
115115 NodeSelector map [string ]string `json:"nodeSelector,omitempty"`
116- // Params stores other useful parameters and will be consumed by the autoscaling components
117- // like cluster-autoscaler, Karpenter.
118- // E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with
119- // instance-type: p4d.24xlarge for AWS.
116+ // Params stores other useful parameters and will be consumed by cluster-autoscaler / Karpenter
117+ // for autoscaling or be defined as model parallelism parameters like TP or PP size.
118+ // E.g. with autoscaling, when scaling up nodes with 8x Nvidia A00, the parameter can be injected
119+ // with <INSTANCE-TYPE: p4d.24xlarge> for AWS.
120+ // Preset parameters: TP, PP, INSTANCE-TYPE.
120121 // +optional
121122 Params map [string ]string `json:"params,omitempty"`
122123}
0 commit comments