Update README.md
Browse files
README.md
CHANGED
@@ -131,15 +131,15 @@ vllm serve \
|
|
131 |
```bash
|
132 |
# Download model from Red Hat Registry via docker
|
133 |
# Note: This downloads the model to ~/.cache/instructlab/models unless --model-dir is specified.
|
134 |
-
ilab model download --repository docker://registry.redhat.io/rhelai1/mistral-small-3-1-24b-instruct-2503:1.5
|
135 |
```
|
136 |
|
137 |
```bash
|
138 |
# Serve model via ilab
|
139 |
-
ilab model serve --model-path ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503
|
140 |
|
141 |
# Chat with model
|
142 |
-
ilab model chat --model ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503
|
143 |
```
|
144 |
See [Red Hat Enterprise Linux AI documentation](https://docs.redhat.com/en/documentation/red_hat_enterprise_linux_ai/1.4) for more details.
|
145 |
</details>
|
@@ -193,9 +193,9 @@ apiVersion: serving.kserve.io/v1beta1
|
|
193 |
kind: InferenceService
|
194 |
metadata:
|
195 |
annotations:
|
196 |
-
openshift.io/display-name:
|
197 |
serving.kserve.io/deploymentMode: RawDeployment
|
198 |
-
name:
|
199 |
labels:
|
200 |
opendatahub.io/dashboard: 'true'
|
201 |
spec:
|
@@ -244,7 +244,7 @@ oc apply -f qwen-inferenceservice.yaml
|
|
244 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
245 |
-H "Content-Type: application/json" \
|
246 |
-d '{
|
247 |
-
"model": "
|
248 |
"stream": true,
|
249 |
"stream_options": {
|
250 |
"include_usage": true
|
|
|
131 |
```bash
|
132 |
# Download model from Red Hat Registry via docker
|
133 |
# Note: This downloads the model to ~/.cache/instructlab/models unless --model-dir is specified.
|
134 |
+
ilab model download --repository docker://registry.redhat.io/rhelai1/mistral-small-3-1-24b-instruct-2503-fp8-dynamic:1.5
|
135 |
```
|
136 |
|
137 |
```bash
|
138 |
# Serve model via ilab
|
139 |
+
ilab model serve --model-path ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503-fp8-dynamic
|
140 |
|
141 |
# Chat with model
|
142 |
+
ilab model chat --model ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503-fp8-dynamic
|
143 |
```
|
144 |
See [Red Hat Enterprise Linux AI documentation](https://docs.redhat.com/en/documentation/red_hat_enterprise_linux_ai/1.4) for more details.
|
145 |
</details>
|
|
|
193 |
kind: InferenceService
|
194 |
metadata:
|
195 |
annotations:
|
196 |
+
openshift.io/display-name: mistral-small-3-1-24b-instruct-2503-fp8-dynamic # OPTIONAL CHANGE
|
197 |
serving.kserve.io/deploymentMode: RawDeployment
|
198 |
+
name: mistral-small-3-1-24b-instruct-2503-fp8-dynamic # specify model name. This value will be used to invoke the model in the payload
|
199 |
labels:
|
200 |
opendatahub.io/dashboard: 'true'
|
201 |
spec:
|
|
|
244 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
245 |
-H "Content-Type: application/json" \
|
246 |
-d '{
|
247 |
+
"model": "mistral-small-3-1-24b-instruct-2503-fp8-dynamic",
|
248 |
"stream": true,
|
249 |
"stream_options": {
|
250 |
"include_usage": true
|