rphrp1985 commited on
Commit
861a731
·
verified ·
1 Parent(s): 9bfe2be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -34
app.py CHANGED
@@ -49,58 +49,58 @@ import transformers
49
 
50
  # model_id = "mistralai/Mistral-7B-v0.3"
51
 
52
- # model_id = "microsoft/Phi-3-medium-4k-instruct"
53
  # model_id = "microsoft/phi-4"
54
 
55
- # # model_id = "Qwen/Qwen2-7B-Instruct"
56
 
57
 
58
- # tokenizer = AutoTokenizer.from_pretrained(
59
- # # model_id
60
- # model_id,
61
- # # use_fast=False
62
- # token= token,
63
- # trust_remote_code=True)
64
 
65
 
66
- # accelerator = Accelerator()
67
 
68
- # model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
69
- # # torch_dtype= torch.uint8,
70
- # torch_dtype=torch.bfloat16,
71
- # # load_in_8bit=True,
72
- # # # # torch_dtype=torch.fl,
73
- # attn_implementation="flash_attention_2",
74
- # low_cpu_mem_usage=True,
75
- # trust_remote_code=True,
76
- # device_map='cuda',
77
- # # device_map=accelerator.device_map,
78
 
79
- # )
80
 
81
 
82
 
83
 
84
 
85
- # #
86
- # model = accelerator.prepare(model)
87
- # from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
88
 
89
- # pipe = pipeline(
90
- # "text-generation",
91
- # model=model,
92
- # tokenizer=tokenizer,
93
- # )
94
 
95
 
96
 
97
 
98
- pipeline = transformers.pipeline(
99
- "text-generation",
100
- model="microsoft/phi-4",
101
- model_kwargs={"torch_dtype": "auto"},
102
- device_map="auto",
103
- )
104
 
105
 
106
  # device_map = infer_auto_device_map(model, max_memory={0: "79GB", "cpu":"65GB" })
 
49
 
50
  # model_id = "mistralai/Mistral-7B-v0.3"
51
 
52
+ model_id = "microsoft/Phi-3-medium-4k-instruct"
53
  # model_id = "microsoft/phi-4"
54
 
55
+ # model_id = "Qwen/Qwen2-7B-Instruct"
56
 
57
 
58
+ tokenizer = AutoTokenizer.from_pretrained(
59
+ # model_id
60
+ model_id,
61
+ # use_fast=False
62
+ token= token,
63
+ trust_remote_code=True)
64
 
65
 
66
+ accelerator = Accelerator()
67
 
68
+ model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
69
+ # torch_dtype= torch.uint8,
70
+ torch_dtype=torch.bfloat16,
71
+ # load_in_8bit=True,
72
+ # # # torch_dtype=torch.fl,
73
+ attn_implementation="flash_attention_2",
74
+ low_cpu_mem_usage=True,
75
+ trust_remote_code=True,
76
+ device_map='cuda',
77
+ # device_map=accelerator.device_map,
78
 
79
+ )
80
 
81
 
82
 
83
 
84
 
85
+ #
86
+ model = accelerator.prepare(model)
87
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
88
 
89
+ pipe = pipeline(
90
+ "text-generation",
91
+ model=model,
92
+ tokenizer=tokenizer,
93
+ )
94
 
95
 
96
 
97
 
98
+ # pipeline = transformers.pipeline(
99
+ # "text-generation",
100
+ # model="microsoft/phi-4",
101
+ # model_kwargs={"torch_dtype": "auto"},
102
+ # device_map="auto",
103
+ # )
104
 
105
 
106
  # device_map = infer_auto_device_map(model, max_memory={0: "79GB", "cpu":"65GB" })