Spaces:

Hyathi
/

SoundImage-LipSync

Paused

samarth-ht commited on Jan 22

Commit

ac802d5

1 Parent(s): cd8c6a9

changes to work

Files changed (6) hide show

.gitignore CHANGED Viewed

@@ -40,6 +40,6 @@ wandb/
 *.jpeg
 *.csv
-!/latentsync/utils/mask.png
 /checkpoints/
 !/assets/*

 *.jpeg
 *.csv
+!/soundimage/utils/mask.png
 /checkpoints/
 !/assets/*

configs/unet/first_stage.yaml CHANGED Viewed

@@ -83,9 +83,7 @@ model:
   unet_use_cross_frame_attention: false
   unet_use_temporal_attention: false
-  # Actually we don't use the motion module in the final version of LatentSync
-  # When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
-  # We decied to leave the code here for possible future usage
   use_motion_module: false
   motion_module_resolutions: [1, 2, 4, 8]
   motion_module_mid_block: false

   unet_use_cross_frame_attention: false
   unet_use_temporal_attention: false
   use_motion_module: false
   motion_module_resolutions: [1, 2, 4, 8]
   motion_module_mid_block: false

configs/unet/second_stage.yaml CHANGED Viewed

@@ -83,9 +83,7 @@ model:
   unet_use_cross_frame_attention: false
   unet_use_temporal_attention: false
-  # Actually we don't use the motion module in the final version of LatentSync
-  # When we started the project, we used the codebase of AnimateDiff and tried motion module, the results are poor
-  # We decied to leave the code here for possible future usage
   use_motion_module: false
   motion_module_resolutions: [1, 2, 4, 8]
   motion_module_mid_block: false

   unet_use_cross_frame_attention: false
   unet_use_temporal_attention: false
   use_motion_module: false
   motion_module_resolutions: [1, 2, 4, 8]
   motion_module_mid_block: false

soundimage/models/motion_module.py CHANGED Viewed

@@ -1,8 +1,4 @@
-# Adapted from https://github.com/guoyww/AnimateDiff/blob/main/animatediff/models/motion_module.py
-# Actually we don't use the motion module in the final version of LatentSync
-# When we started the project, we used the codebase of AnimateDiff and tried motion module
-# But the results are poor, and we decied to leave the code here for possible future usage
 from dataclasses import dataclass



1



2
3	from dataclasses import dataclass
4

soundimage/utils/image_processor.py CHANGED Viewed

@@ -29,7 +29,7 @@ https://stackoverflow.com/questions/23853632/which-kind-of-interpolation-best-fo
 def load_fixed_mask(resolution: int) -> torch.Tensor:
-    mask_image = cv2.imread("latentsync/utils/mask.png")
     mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
     mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
     mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")

 def load_fixed_mask(resolution: int) -> torch.Tensor:
+    mask_image = cv2.imread("soundimage/utils/mask.png")
     mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
     mask_image = cv2.resize(mask_image, (resolution, resolution), interpolation=cv2.INTER_AREA) / 255.0
     mask_image = rearrange(torch.from_numpy(mask_image), "h w c -> c h w")

soundimage/utils/mask.png ADDED Viewed