πͺ InferenceService name updated
#8
by
ckavili
- opened
README.md
CHANGED
|
@@ -175,7 +175,7 @@ metadata:
|
|
| 175 |
annotations:
|
| 176 |
openshift.io/display-name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic # OPTIONAL CHANGE
|
| 177 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 178 |
-
name:
|
| 179 |
labels:
|
| 180 |
opendatahub.io/dashboard: 'true'
|
| 181 |
spec:
|
|
@@ -213,7 +213,7 @@ spec:
|
|
| 213 |
oc apply -f vllm-servingruntime.yaml
|
| 214 |
|
| 215 |
# Apply the InferenceService
|
| 216 |
-
oc apply -f
|
| 217 |
```
|
| 218 |
|
| 219 |
```python
|
|
@@ -224,7 +224,7 @@ oc apply -f qwen-inferenceservice.yaml
|
|
| 224 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 225 |
-H "Content-Type: application/json" \
|
| 226 |
-d '{
|
| 227 |
-
"model": "
|
| 228 |
"stream": true,
|
| 229 |
"stream_options": {
|
| 230 |
"include_usage": true
|
|
|
|
| 175 |
annotations:
|
| 176 |
openshift.io/display-name: Llama-4-Scout-17B-16E-Instruct-FP8-dynamic # OPTIONAL CHANGE
|
| 177 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 178 |
+
name: llama-4-scout-17b-16e-instruct-fp8-dynamic # specify model name. This value will be used to invoke the model in the payload
|
| 179 |
labels:
|
| 180 |
opendatahub.io/dashboard: 'true'
|
| 181 |
spec:
|
|
|
|
| 213 |
oc apply -f vllm-servingruntime.yaml
|
| 214 |
|
| 215 |
# Apply the InferenceService
|
| 216 |
+
oc apply -f llama4-scout-inferenceservice.yaml
|
| 217 |
```
|
| 218 |
|
| 219 |
```python
|
|
|
|
| 224 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 225 |
-H "Content-Type: application/json" \
|
| 226 |
-d '{
|
| 227 |
+
"model": "llama-4-scout-17b-16e-instruct-fp8-dynamic",
|
| 228 |
"stream": true,
|
| 229 |
"stream_options": {
|
| 230 |
"include_usage": true
|