chendren commited on
Commit
bc776cd
·
verified ·
1 Parent(s): 6541baf

Upload inference_api_example.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference_api_example.py +72 -0
inference_api_example.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Example script for using the Hugging Face Inference API with the D&D model
3
+ # This approach will track download metrics on Hugging Face
4
+
5
+ import requests
6
+ import json
7
+ import time
8
+
9
+ # Configuration
10
+ API_URL = "https://api-inference.huggingface.co/models/chendren/deepseek-dnd-lora"
11
+ headers = {
12
+ "Content-Type": "application/json",
13
+ # Replace with your Hugging Face API token
14
+ "Authorization": "Bearer YOUR_API_TOKEN"
15
+ }
16
+
17
+ # Test prompts
18
+ test_prompts = [
19
+ "Create a D&D character with the following details: Race: Half-Elf, Class: Bard, Background: Entertainer",
20
+ "Design a D&D adventure hook set in a dark forest",
21
+ "Create a magical item for D&D 5e that would be suitable for a level 5 rogue",
22
+ "Write a description for a fantasy tavern in a D&D setting"
23
+ ]
24
+
25
+ def query(payload):
26
+ """
27
+ Send a query to the Hugging Face API
28
+ """
29
+ response = requests.post(API_URL, headers=headers, json=payload)
30
+
31
+ if response.status_code == 503:
32
+ # Model is loading
33
+ print("Model is loading. Waiting...")
34
+ time.sleep(20)
35
+ return query(payload)
36
+
37
+ return response.json()
38
+
39
+ # Run tests
40
+ for i, prompt in enumerate(test_prompts):
41
+ print(f"\n==== Test Prompt {i+1} ====")
42
+ print(prompt)
43
+ print("\n==== Response ====")
44
+
45
+ # Add a system prompt to help guide the model
46
+ full_prompt = f"You are a Dungeons & Dragons assistant. {prompt}"
47
+
48
+ # Make the API request - this will be tracked by HF Hub
49
+ payload = {
50
+ "inputs": full_prompt,
51
+ "parameters": {
52
+ "max_new_tokens": 500,
53
+ "temperature": 0.7,
54
+ "top_p": 0.9,
55
+ "top_k": 50,
56
+ "repetition_penalty": 1.1,
57
+ "do_sample": True
58
+ }
59
+ }
60
+
61
+ try:
62
+ result = query(payload)
63
+ print(json.dumps(result, indent=2))
64
+ except Exception as e:
65
+ print(f"Error: {e}")
66
+
67
+ print("\n" + "="*50)
68
+
69
+ # Wait a bit between requests to avoid rate limiting
70
+ time.sleep(3)
71
+
72
+ print("\nTesting complete!")