alexkueck commited on
Commit
a53f544
1 Parent(s): f179d98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -26
app.py CHANGED
@@ -352,32 +352,8 @@ def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperatu
352
  "inputs": prompt,
353
  "options": {"max_new_tokens": max_new_tokens},
354
  }
355
- ##############################################
356
- #Inference Endpoint
357
- ##############################################
358
- endpoint = create_inference_endpoint(
359
- "smaug-72b-v0-1-bmw",
360
- repository="abacusai/Smaug-72B-v0.1",
361
- framework="pytorch",
362
- task="text-generation",
363
- accelerator="gpu",
364
- vendor="aws",
365
- region="us-east-1",
366
- type="protected",
367
- instance_size="medium",
368
- instance_type="g5.2xlarge",
369
- custom_image={
370
- "health_route": "/health",
371
- "env": {
372
- "MAX_BATCH_PREFILL_TOKENS": "2048",
373
- "MAX_INPUT_LENGTH": "1024",
374
- "MAX_TOTAL_TOKENS": "1512",
375
- "MODEL_ID": "/repository"
376
- },
377
- "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
378
- },
379
- )
380
- response= endpoint.client.text_generation(prompt) #requests.post(API_URL, headers=HEADERS, json=data)
381
  if response != None:
382
  result = response.json()
383
  print("result:------------------")
 
352
  "inputs": prompt,
353
  "options": {"max_new_tokens": max_new_tokens},
354
  }
355
+
356
+ response= requests.post(API_URL, headers=HEADERS, json=data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  if response != None:
358
  result = response.json()
359
  print("result:------------------")