Spaces:
Paused
Paused
Commit
·
ac802d5
1
Parent(s):
cd8c6a9
changes to work
Browse files- .gitignore +1 -1
- configs/unet/first_stage.yaml +1 -3
- configs/unet/second_stage.yaml +1 -3
- soundimage/models/motion_module.py +0 -4
- soundimage/utils/image_processor.py +1 -1
- soundimage/utils/mask.png +3 -0
.gitignore
CHANGED
|
@@ -40,6 +40,6 @@ wandb/
|
|
| 40 |
*.jpeg
|
| 41 |
*.csv
|
| 42 |
|
| 43 |
-
!/
|
| 44 |
/checkpoints/
|
| 45 |
!/assets/*
|
|
|
|
| 40 |
*.jpeg
|
| 41 |
*.csv
|
| 42 |
|
| 43 |
+
!/soundimage/utils/mask.png
|
| 44 |
/checkpoints/
|
| 45 |
!/assets/*
|
configs/unet/first_stage.yaml
CHANGED
|
@@ -83,9 +83,7 @@ model:
|
|
| 83 |
unet_use_cross_frame_attention: false
|
| 84 |
unet_use_temporal_attention: false
|
| 85 |
|
| 86 |
-
|
| 87 |
-
# When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
|
| 88 |
-
# We decied to leave the code here for possible future usage
|
| 89 |
use_motion_module: false
|
| 90 |
motion_module_resolutions: [1, 2, 4, 8]
|
| 91 |
motion_module_mid_block: false
|
|
|
|
| 83 |
unet_use_cross_frame_attention: false
|
| 84 |
unet_use_temporal_attention: false
|
| 85 |
|
| 86 |
+
|
|
|
|
|
|
|
| 87 |
use_motion_module: false
|
| 88 |
motion_module_resolutions: [1, 2, 4, 8]
|
| 89 |
motion_module_mid_block: false
|
configs/unet/second_stage.yaml
CHANGED
|
@@ -83,9 +83,7 @@ model:
|
|
| 83 |
unet_use_cross_frame_attention: false
|
| 84 |
unet_use_temporal_attention: false
|
| 85 |
|
| 86 |
-
|
| 87 |
-
# When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
|
| 88 |
-
# We decied to leave the code here for possible future usage
|
| 89 |
use_motion_module: false
|
| 90 |
motion_module_resolutions: [1, 2, 4, 8]
|
| 91 |
motion_module_mid_block: false
|
|
|
|
| 83 |
unet_use_cross_frame_attention: false
|
| 84 |
unet_use_temporal_attention: false
|
| 85 |
|
| 86 |
+
|
|
|
|
|
|
|
| 87 |
use_motion_module: false
|
| 88 |
motion_module_resolutions: [1, 2, 4, 8]
|
| 89 |
motion_module_mid_block: false
|
soundimage/models/motion_module.py
CHANGED
|
@@ -1,8 +1,4 @@
|
|
| 1 |
-
# Adapted from https://github.com/guoyww/AnimateDiff/blob/main/animatediff/models/motion_module.py
|
| 2 |
|
| 3 |
-
# Actually we don't use the motion module in the final version of LatentSync
|
| 4 |
-
# When we started the project, we used the codebase of AnimateDiff and tried motion module
|
| 5 |
-
# But the results are poor, and we decied to leave the code here for possible future usage
|
| 6 |
|
| 7 |
from dataclasses import dataclass
|
| 8 |
|
|
|
|
|
|
|
| 1 |
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from dataclasses import dataclass
|
| 4 |
|
soundimage/utils/image_processor.py
CHANGED
|
@@ -29,7 +29,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
|
|
| 29 |
|
| 30 |
|
| 31 |
def load_fixed_mask(resolution: int) -> torch.Tensor:
|
| 32 |
-
mask_image = cv2.imread("
|
| 33 |
mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
|
| 34 |
mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
|
| 35 |
mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
def load_fixed_mask(resolution: int) -> torch.Tensor:
|
| 32 |
+
mask_image = cv2.imread("soundimage/utils/mask.png")
|
| 33 |
mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
|
| 34 |
mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
|
| 35 |
mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")
|
soundimage/utils/mask.png
ADDED
|
Git LFS Details
|