xly
commited on
Commit
•
a75cfdd
1
Parent(s):
faf48c3
fix offloading dir
Browse files
src/backend/hflm_with_measurement.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import copy
|
2 |
import os
|
3 |
from datetime import timedelta
|
|
|
4 |
from time import time
|
5 |
from pathlib import Path
|
6 |
from typing import List, Literal, Optional, Tuple, Union
|
|
|
1 |
import copy
|
2 |
import os
|
3 |
from datetime import timedelta
|
4 |
+
import sys
|
5 |
from time import time
|
6 |
from pathlib import Path
|
7 |
from typing import List, Literal, Optional, Tuple, Union
|
src/backend/moe_infinity.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import torch
|
2 |
import os
|
|
|
3 |
from transformers import AutoTokenizer
|
4 |
from transformers import AutoModelForCausalLM
|
5 |
from moe_infinity import MoE
|
@@ -34,6 +35,11 @@ class MoEHFLM(HFLMWithMeasurement):
|
|
34 |
*args, **kwargs, pretrained=pretrained, device_map="cuda:0"
|
35 |
) # Assuming HFLM accepts a 'pretrained' arg and handles it
|
36 |
# self._create_model()
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def _create_model(self, *args, **kwargs):
|
39 |
"""
|
@@ -46,7 +52,18 @@ class MoEHFLM(HFLMWithMeasurement):
|
|
46 |
}
|
47 |
# Update default config with any user-provided config
|
48 |
final_moe_config = {**default_moe_config, **self.moe_config}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
self._model = MoE(self.checkpoint, final_moe_config)
|
|
|
50 |
# self._model = AutoModelForCausalLM.from_pretrained(
|
51 |
# self.checkpoint, torch_dtype=torch.float16, device_map="auto"
|
52 |
# )
|
|
|
1 |
import torch
|
2 |
import os
|
3 |
+
import shutil
|
4 |
from transformers import AutoTokenizer
|
5 |
from transformers import AutoModelForCausalLM
|
6 |
from moe_infinity import MoE
|
|
|
35 |
*args, **kwargs, pretrained=pretrained, device_map="cuda:0"
|
36 |
) # Assuming HFLM accepts a 'pretrained' arg and handles it
|
37 |
# self._create_model()
|
38 |
+
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
|
39 |
+
|
40 |
+
def __del__(self):
|
41 |
+
# Clean up offloaded models from self.offload_path
|
42 |
+
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
|
43 |
|
44 |
def _create_model(self, *args, **kwargs):
|
45 |
"""
|
|
|
52 |
}
|
53 |
# Update default config with any user-provided config
|
54 |
final_moe_config = {**default_moe_config, **self.moe_config}
|
55 |
+
|
56 |
+
# dirty fix, to be removed when MoE-infinity supports move input to correct device
|
57 |
+
def MoEGenDecorator(func):
|
58 |
+
def wrapper(*args, **kwargs):
|
59 |
+
# Ensure all tensor in the input are in the same device as the model
|
60 |
+
args = [arg.to("cuda:0") if isinstance(arg, torch.Tensor) else arg for arg in args]
|
61 |
+
kwargs = {k: v.to("cuda:0") if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
|
62 |
+
return func(*args, **kwargs)
|
63 |
+
return wrapper
|
64 |
+
|
65 |
self._model = MoE(self.checkpoint, final_moe_config)
|
66 |
+
self._model.generate = MoEGenDecorator(self._model.generate)
|
67 |
# self._model = AutoModelForCausalLM.from_pretrained(
|
68 |
# self.checkpoint, torch_dtype=torch.float16, device_map="auto"
|
69 |
# )
|