RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic · 🪄 InferenceService name updated

🪄 InferenceService name updated

by ckavili - opened Jul 14

←

Files changed (1) hide show

README.md CHANGED Viewed

@@ -175,7 +175,7 @@ metadata:
   annotations:
     openshift.io/display-name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic # OPTIONAL CHANGE
     serving.kserve.io/deploymentMode: RawDeployment
-  name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic          # specify model name. This value will be used to invoke the model in the payload
   labels:
     opendatahub.io/dashboard: 'true'
 spec:
@@ -213,7 +213,7 @@ spec:
 oc apply -f vllm-servingruntime.yaml
 # Apply the InferenceService
-oc apply -f qwen-inferenceservice.yaml
 ```
 ```python
@@ -224,7 +224,7 @@ oc apply -f qwen-inferenceservice.yaml
 curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
         -H "Content-Type: application/json" \
         -d '{
-    "model": "Llama-4-Scout-17B-16E-Instruct-FP8-dynamic",
     "stream": true,
     "stream_options": {
         "include_usage": true

   annotations:
     openshift.io/display-name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic # OPTIONAL CHANGE
     serving.kserve.io/deploymentMode: RawDeployment
+  name: llama-4-scout-17b-16e-instruct-fp8-dynamic          # specify model name. This value will be used to invoke the model in the payload
   labels:
     opendatahub.io/dashboard: 'true'
 spec:
 oc apply -f vllm-servingruntime.yaml
 # Apply the InferenceService
+oc apply -f llama4-scout-inferenceservice.yaml
 ```
 ```python
 curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
         -H "Content-Type: application/json" \
         -d '{
+    "model": "llama-4-scout-17b-16e-instruct-fp8-dynamic",
     "stream": true,
     "stream_options": {
         "include_usage": true