Update README.md
Browse files
README.md
CHANGED
@@ -172,9 +172,9 @@ apiVersion: serving.kserve.io/v1beta1
|
|
172 |
kind: InferenceService
|
173 |
metadata:
|
174 |
annotations:
|
175 |
-
openshift.io/display-name:
|
176 |
serving.kserve.io/deploymentMode: RawDeployment
|
177 |
-
name:
|
178 |
labels:
|
179 |
opendatahub.io/dashboard: 'true'
|
180 |
spec:
|
@@ -223,7 +223,7 @@ oc apply -f qwen-inferenceservice.yaml
|
|
223 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
224 |
-H "Content-Type: application/json" \
|
225 |
-d '{
|
226 |
-
"model": "
|
227 |
"stream": true,
|
228 |
"stream_options": {
|
229 |
"include_usage": true
|
|
|
172 |
kind: InferenceService
|
173 |
metadata:
|
174 |
annotations:
|
175 |
+
openshift.io/display-name: llama-3-3-70b-instruct-fp8-dynamic # OPTIONAL CHANGE
|
176 |
serving.kserve.io/deploymentMode: RawDeployment
|
177 |
+
name: llama-3-3-70b-instruct-fp8-dynamic # specify model name. This value will be used to invoke the model in the payload
|
178 |
labels:
|
179 |
opendatahub.io/dashboard: 'true'
|
180 |
spec:
|
|
|
223 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
224 |
-H "Content-Type: application/json" \
|
225 |
-d '{
|
226 |
+
"model": "llama-3-3-70b-instruct-fp8-dynamic",
|
227 |
"stream": true,
|
228 |
"stream_options": {
|
229 |
"include_usage": true
|