πŸͺ„ InferenceService name updated

#8
by ckavili - opened
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -175,7 +175,7 @@ metadata:
175
  annotations:
176
  openshift.io/display-name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic # OPTIONAL CHANGE
177
  serving.kserve.io/deploymentMode: RawDeployment
178
- name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic # specify model name. This value will be used to invoke the model in the payload
179
  labels:
180
  opendatahub.io/dashboard: 'true'
181
  spec:
@@ -213,7 +213,7 @@ spec:
213
  oc apply -f vllm-servingruntime.yaml
214
 
215
  # Apply the InferenceService
216
- oc apply -f qwen-inferenceservice.yaml
217
  ```
218
 
219
  ```python
@@ -224,7 +224,7 @@ oc apply -f qwen-inferenceservice.yaml
224
  curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
225
  -H "Content-Type: application/json" \
226
  -d '{
227
- "model": "Llama-4-Scout-17B-16E-Instruct-FP8-dynamic",
228
  "stream": true,
229
  "stream_options": {
230
  "include_usage": true
 
175
  annotations:
176
  openshift.io/display-name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic # OPTIONAL CHANGE
177
  serving.kserve.io/deploymentMode: RawDeployment
178
+ name: llama-4-scout-17b-16e-instruct-fp8-dynamic # specify model name. This value will be used to invoke the model in the payload
179
  labels:
180
  opendatahub.io/dashboard: 'true'
181
  spec:
 
213
  oc apply -f vllm-servingruntime.yaml
214
 
215
  # Apply the InferenceService
216
+ oc apply -f llama4-scout-inferenceservice.yaml
217
  ```
218
 
219
  ```python
 
224
  curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
225
  -H "Content-Type: application/json" \
226
  -d '{
227
+ "model": "llama-4-scout-17b-16e-instruct-fp8-dynamic",
228
  "stream": true,
229
  "stream_options": {
230
  "include_usage": true