jennyyyi commited on
Commit
2a510a1
·
verified ·
1 Parent(s): 1bc580a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -4
README.md CHANGED
@@ -88,7 +88,7 @@ ilab model download --repository docker://registry.redhat.io/rhelai1/granite-3-1
88
 
89
  ```bash
90
  # Serve model via ilab
91
- ilab model serve --model-path ~/.cache/instructlab/models/granite-3-1-8b-instruct
92
 
93
  # Chat with model
94
  ilab model chat --model ~/.cache/instructlab/models/granite-3-1-8b-instruct
@@ -145,9 +145,9 @@ apiVersion: serving.kserve.io/v1beta1
145
  kind: InferenceService
146
  metadata:
147
  annotations:
148
- openshift.io/display-name: RedHatAI/granite-3.1-8b-instruct # OPTIONAL CHANGE
149
  serving.kserve.io/deploymentMode: RawDeployment
150
- name: RedHatAI/granite-3.1-8b-instruct # specify model name. This value will be used to invoke the model in the payload
151
  labels:
152
  opendatahub.io/dashboard: 'true'
153
  spec:
@@ -155,6 +155,8 @@ spec:
155
  maxReplicas: 1
156
  minReplicas: 1
157
  model:
 
 
158
  modelFormat:
159
  name: vLLM
160
  name: ''
@@ -196,7 +198,7 @@ oc apply -f qwen-inferenceservice.yaml
196
  curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
197
  -H "Content-Type: application/json" \
198
  -d '{
199
- "model": "RedHatAI/granite-3.1-8b-instruct",
200
  "stream": true,
201
  "stream_options": {
202
  "include_usage": true
 
88
 
89
  ```bash
90
  # Serve model via ilab
91
+ ilab model serve --model-path ~/.cache/instructlab/models/granite-3-1-8b-instruct -- --trust-remote-code
92
 
93
  # Chat with model
94
  ilab model chat --model ~/.cache/instructlab/models/granite-3-1-8b-instruct
 
145
  kind: InferenceService
146
  metadata:
147
  annotations:
148
+ openshift.io/display-name: granite-3-1-8b-instruct # OPTIONAL CHANGE
149
  serving.kserve.io/deploymentMode: RawDeployment
150
+ name: granite-3-1-8b-instruct # specify model name. This value will be used to invoke the model in the payload
151
  labels:
152
  opendatahub.io/dashboard: 'true'
153
  spec:
 
155
  maxReplicas: 1
156
  minReplicas: 1
157
  model:
158
+ args:
159
+ - '--trust-remote-code'
160
  modelFormat:
161
  name: vLLM
162
  name: ''
 
198
  curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
199
  -H "Content-Type: application/json" \
200
  -d '{
201
+ "model": "granite-3-1-8b-instruct",
202
  "stream": true,
203
  "stream_options": {
204
  "include_usage": true