Spaces:

bytedance-research
/

Lance

Running on Zero

App Files Files Community

ffy2000 commited on May 27

Commit

8a696da

1 Parent(s): 2c3399c

update lance_gradio

Browse files

Files changed (15) hide show

app.py +30 -34
data/datasets_custom/validation_dataset.py +8 -3
lance_gradio/recommended_outputs/t2v/000000__sig-c2df7ef33f058e4e82a7.mp4 +3 -0
lance_gradio/recommended_outputs/t2v/000001__sig-ac3597a59fd4b21b74af.mp4 +3 -0
lance_gradio/recommended_outputs/t2v/000002__sig-b61630761c595abe4d47.mp4 +3 -0
lance_gradio/recommended_outputs/t2v/000004__sig-19a35c3c77bc2ac6ebe0.mp4 +3 -0
lance_gradio/recommended_outputs/t2v/000005__sig-edfae9c14d5b87228251.mp4 +3 -0
lance_gradio/recommended_outputs/t2v/000007__sig-0287f57acc7e0a19455a.mp4 +3 -0
lance_gradio/recommended_outputs/t2v/000008__sig-b80fdb9a8b3e5eaf18c2.mp4 +3 -0
lance_gradio/recommended_outputs/video_edit/video_edit_example_000000__sig-8a88ccb08f4268c9a022.mp4 +3 -0
lance_gradio/recommended_outputs/video_edit/video_edit_example_000001__sig-d8f24547f0161c9a0cb8.mp4 +3 -0
lance_gradio/recommended_outputs/video_edit/video_edit_example_000002__sig-8001f001fa5822d8b2c4.mp4 +3 -0
lance_gradio/recommended_outputs/x2t_video/x2t_video_example_000000__sig-f771bb1b029b935df571.txt +1 -0
lance_gradio/recommended_outputs/x2t_video/x2t_video_example_000001__sig-54a7f19e0169fbfed57b.txt +1 -0
modeling/vae/wan/model.py +4 -3

app.py CHANGED Viewed

@@ -150,7 +150,7 @@ IMAGE_ASPECT_RATIO_TO_SIZE = {
 }
 DEFAULT_GPUS = "0"
 DEFAULT_QUEUE_SIZE = 32
-DEFAULT_CONCURRENCY_LIMIT = 2
 USE_KVCACHE = True
 TEXT_TEMPLATE = True
 RECORD_WRITE_LOCK = threading.Lock()
@@ -2677,7 +2677,7 @@ class LanceT2VV2TPipeline:
             if inference_args.visual_gen:
                 stage_start = time.perf_counter()
                 print(f"[startup][gpu:{self.device}] Initializing VAE", flush=True)
-                vae_model = WanVideoVAE()
                 vae_config = deepcopy(vae_model.vae_config)
                 self._log_stage("VAE init", stage_start)
             else:
@@ -3196,7 +3196,6 @@ class PipelinePool:
             self.release(pipeline)
-PIPELINE_POOLS: dict[str, PipelinePool] = {}
 ACTIVE_PIPELINE_POOL: Optional[PipelinePool] = None
 ACTIVE_POOL_LOCK = threading.Lock()
 QUEUE_MAX_SIZE = DEFAULT_QUEUE_SIZE
@@ -3268,44 +3267,44 @@ ZERO_GPU_RUN_TASK_DURATION_SECONDS = get_zerogpu_duration_cap()
 def is_pipeline_pool_ready_for_variant(model_variant: str) -> bool:
     normalized_variant = normalize_model_variant(model_variant)
     with ACTIVE_POOL_LOCK:
-        pool = PIPELINE_POOLS.get(normalized_variant)
-        return bool(pool is not None and pool.is_initialized)
 def is_pipeline_pool_ready_for_task(task: str) -> bool:
     return is_pipeline_pool_ready_for_variant(get_task_model_variant(task))
-def get_or_create_pipeline_pool(model_variant: str) -> PipelinePool:
     if not torch.cuda.is_available():
         raise RuntimeError(
             "Lance inference requires a GPU. The Gradio UI can start on CPU, but generation is disabled "
             "until GPU hardware is attached."
         )
-    normalized_variant = normalize_model_variant(model_variant)
     gpu_ids = parse_gpu_ids(os.getenv("LANCE_GPUS", DEFAULT_GPUS))
     with ACTIVE_POOL_LOCK:
-        pool = PIPELINE_POOLS.get(normalized_variant)
-        if pool is None:
-            pool = PipelinePool(gpu_ids, model_variant=normalized_variant)
-            PIPELINE_POOLS[normalized_variant] = pool
-        return pool
-def ensure_pipeline_pool_ready(model_variant: str) -> PipelinePool:
-    pool = get_or_create_pipeline_pool(model_variant)
-    if not pool.is_initialized:
-        pool.initialize_all()
-    return pool
-def get_pipeline_pool(task: str) -> PipelinePool:
-    global ACTIVE_PIPELINE_POOL
-    model_variant = get_task_model_variant(task)
-    pool = ensure_pipeline_pool_ready(model_variant)
-    with ACTIVE_POOL_LOCK:
-        ACTIVE_PIPELINE_POOL = pool
-    return pool
 def finalize_zerogpu_duration(estimated_seconds: float, task: str) -> int:
@@ -3575,18 +3574,15 @@ def build_status_markdown() -> str:
     gpu_text = "unknown"
     pipeline_slots = 0
     active_variant = "none"
-    cached_variants = "none"
-    if ACTIVE_PIPELINE_POOL is not None:
-        active_variant = ACTIVE_PIPELINE_POOL.model_variant
-        gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
-        pipeline_slots = ACTIVE_PIPELINE_POOL.size
     with ACTIVE_POOL_LOCK:
-        if PIPELINE_POOLS:
-            cached_variants = ",".join(sorted(PIPELINE_POOLS.keys()))
     return (
         f"**Status**  GPU: `{gpu_text}`  |  Queue concurrency: `{QUEUE_CONCURRENCY_LIMIT}`  |  "
         f"Pipeline slots: `{pipeline_slots}`  |  Queue limit: `{QUEUE_MAX_SIZE}`  |  "
-        f"Active model: `{active_variant}`  |  Cached variants: `{cached_variants}`"
     )

 }
 DEFAULT_GPUS = "0"
 DEFAULT_QUEUE_SIZE = 32
+DEFAULT_CONCURRENCY_LIMIT = 1
 USE_KVCACHE = True
 TEXT_TEMPLATE = True
 RECORD_WRITE_LOCK = threading.Lock()
             if inference_args.visual_gen:
                 stage_start = time.perf_counter()
                 print(f"[startup][gpu:{self.device}] Initializing VAE", flush=True)
+                vae_model = WanVideoVAE(device=torch.device("cuda", self.device))
                 vae_config = deepcopy(vae_model.vae_config)
                 self._log_stage("VAE init", stage_start)
             else:
             self.release(pipeline)
 ACTIVE_PIPELINE_POOL: Optional[PipelinePool] = None
 ACTIVE_POOL_LOCK = threading.Lock()
 QUEUE_MAX_SIZE = DEFAULT_QUEUE_SIZE
 def is_pipeline_pool_ready_for_variant(model_variant: str) -> bool:
     normalized_variant = normalize_model_variant(model_variant)
     with ACTIVE_POOL_LOCK:
+        return bool(
+            ACTIVE_PIPELINE_POOL is not None
+            and ACTIVE_PIPELINE_POOL.model_variant == normalized_variant
+            and ACTIVE_PIPELINE_POOL.is_initialized
+        )
 def is_pipeline_pool_ready_for_task(task: str) -> bool:
     return is_pipeline_pool_ready_for_variant(get_task_model_variant(task))
+def get_pipeline_pool(task: str) -> PipelinePool:
+    global ACTIVE_PIPELINE_POOL
     if not torch.cuda.is_available():
         raise RuntimeError(
             "Lance inference requires a GPU. The Gradio UI can start on CPU, but generation is disabled "
             "until GPU hardware is attached."
         )
+    model_variant = get_task_model_variant(task)
     gpu_ids = parse_gpu_ids(os.getenv("LANCE_GPUS", DEFAULT_GPUS))
     with ACTIVE_POOL_LOCK:
+        if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
+            if not ACTIVE_PIPELINE_POOL.is_initialized:
+                ACTIVE_PIPELINE_POOL.initialize_all()
+            return ACTIVE_PIPELINE_POOL
+        if ACTIVE_PIPELINE_POOL is not None:
+            previous_variant = ACTIVE_PIPELINE_POOL.model_variant
+            print(
+                f"[runtime] Switching Lance model from {previous_variant} to {model_variant}.",
+                flush=True,
+            )
+            ACTIVE_PIPELINE_POOL.unload_all()
+            ACTIVE_PIPELINE_POOL = None
+        ACTIVE_PIPELINE_POOL = PipelinePool(gpu_ids, model_variant=model_variant)
+        ACTIVE_PIPELINE_POOL.initialize_all()
+        return ACTIVE_PIPELINE_POOL
 def finalize_zerogpu_duration(estimated_seconds: float, task: str) -> int:
     gpu_text = "unknown"
     pipeline_slots = 0
     active_variant = "none"
     with ACTIVE_POOL_LOCK:
+        if ACTIVE_PIPELINE_POOL is not None:
+            active_variant = ACTIVE_PIPELINE_POOL.model_variant
+            gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
+            pipeline_slots = ACTIVE_PIPELINE_POOL.size
     return (
         f"**Status**  GPU: `{gpu_text}`  |  Queue concurrency: `{QUEUE_CONCURRENCY_LIMIT}`  |  "
         f"Pipeline slots: `{pipeline_slots}`  |  Queue limit: `{QUEUE_MAX_SIZE}`  |  "
+        f"Active model: `{active_variant}`"
     )

data/datasets_custom/validation_dataset.py CHANGED Viewed

@@ -242,10 +242,15 @@ class ValidationDataset(Dataset):
             video_reader = VideoReader(video_stream, ctx=decord.cpu(worker_id % self.cpu_count))
             total_frames = len(video_reader)
             frames_info = {
-                "clip_indices": [(0, total_frames)],
-                "fps": 24,
-            }
             frames_sampler_output: FrameSamplerOutput = self.frame_sampler(frames_info)
             video_frames = self._read_decord(video_reader, frames_sampler_output.indices)

             video_reader = VideoReader(video_stream, ctx=decord.cpu(worker_id % self.cpu_count))
             total_frames = len(video_reader)
+            try:
+                fps = int(round(float(video_reader.get_avg_fps())))
+            except Exception:
+                fps = 24
             frames_info = {
+                    "clip_indices": [(0, total_frames)],
+                    "fps": fps,
+                }
             frames_sampler_output: FrameSamplerOutput = self.frame_sampler(frames_info)
             video_frames = self._read_decord(video_reader, frames_sampler_output.indices)

lance_gradio/recommended_outputs/t2v/000000__sig-c2df7ef33f058e4e82a7.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d9f1df5138213c7cd6d01dbe0e76588d4198aeedb7ddc7674666f0c2326ecaa
+size 1162448

lance_gradio/recommended_outputs/t2v/000001__sig-ac3597a59fd4b21b74af.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:396b2e4e50c35e103a061ba195976cf97fded516624b725e6f3c82302b1325f1
+size 1370505

lance_gradio/recommended_outputs/t2v/000002__sig-b61630761c595abe4d47.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6eab3b95d86927c601f26361f4cbe52a2b153454af97ab28a3e76b64e98fa9c9
+size 872768

lance_gradio/recommended_outputs/t2v/000004__sig-19a35c3c77bc2ac6ebe0.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17b2de3a7ea77dde9dfc300c607e4d7895ca3bd8ca9e013c32cd80c27831c740
+size 377878

lance_gradio/recommended_outputs/t2v/000005__sig-edfae9c14d5b87228251.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb6569439ec2dfd2490d78f5c41a3daa43e6d9d0ed1d754adb4b10205e9bb8d3
+size 1015510

lance_gradio/recommended_outputs/t2v/000007__sig-0287f57acc7e0a19455a.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da4d9faafe6aeaa4d8f7f219d29693aeb91de1b76eb61ada26a23eb09409eebb
+size 428967

lance_gradio/recommended_outputs/t2v/000008__sig-b80fdb9a8b3e5eaf18c2.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af9ed73ecbc68be835272cd669703a5cb006add403eeca3f9595cbd4d0a1a563
+size 658964

lance_gradio/recommended_outputs/video_edit/video_edit_example_000000__sig-8a88ccb08f4268c9a022.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf8f02b8dfc45f79a23d8723c6b199f809870992ae96bf0d1373c102b5ad93ba
+size 355592

lance_gradio/recommended_outputs/video_edit/video_edit_example_000001__sig-d8f24547f0161c9a0cb8.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:029a8a376ac32b1666ffcb2b4f4d8e460bccdad274081328d6e7dced2d414a9a
+size 164643

lance_gradio/recommended_outputs/video_edit/video_edit_example_000002__sig-8001f001fa5822d8b2c4.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ab6f3f83c2ba46cea5fc85606cc8c81a8ca6e0cf37fe6758d823b588621969d
+size 428036

lance_gradio/recommended_outputs/x2t_video/x2t_video_example_000000__sig-f771bb1b029b935df571.txt ADDED Viewed

	@@ -0,0 +1 @@

+ In this video, we see a butterfly and a bee interacting with each other and a flower. The butterfly is resting on the flower, and the bee is flying around it. Both the butterfly and the bee are part of the process of pollination. The butterfly consumes nectar from the flower, while the bee gathers pollen. This interaction is a clear display of the pollination process, with the butterfly and bee working in tandem to obtain food. The video also shows the beautiful details of the butterfly's wings and the bee's body structure as they move around the flower.<|im_end|>

lance_gradio/recommended_outputs/x2t_video/x2t_video_example_000001__sig-54a7f19e0169fbfed57b.txt ADDED Viewed

	@@ -0,0 +1 @@

+ In the video, a woman is cooking in a kitchen. She is using a large metal bowl to mix ingredients. There are some red onions and red peppers on the table beside her. She is adding some tomato puree to the bowl and stirring it with a spoon. The camera focuses on her hands as she mixes the ingredients, and weaves the tomato puree into the mixture. The onions and red peppers seem to be part of the seasoning or garnishing of the dish. The whole process of mixing the ingredients is shown in a simple and clear way, making the audience feel the warmth and comfort of home-cooked food.<|im_end|>

modeling/vae/wan/model.py CHANGED Viewed

@@ -43,6 +43,7 @@ class WanVideoVAE(object):
         self.logger = self.__class__.__logger__
         self.dtype = kwargs.get("dtype", torch.bfloat16)
         self.configure_vae_model()
         self.use_sample = kwargs.get("use_sample", True)
@@ -56,7 +57,7 @@ class WanVideoVAE(object):
         )
     def configure_vae_model(self):
-        device = get_device()
         # 从 path_default.yaml 读取 VAE 路径
         try:
@@ -72,7 +73,7 @@ class WanVideoVAE(object):
     @torch.no_grad()
     def vae_encode(self, samples: List[Tensor], **kwargs) -> List[Tensor]:
-        device = get_device()
         latents = []
         with torch.autocast(device_type="cuda", dtype=self.dtype):
@@ -92,7 +93,7 @@ class WanVideoVAE(object):
     @torch.no_grad()
     def vae_decode(self, latents: List[Tensor], **kwargs) -> List[Tensor]:
-        device = get_device()
         samples = []
         with torch.autocast(device_type="cuda", dtype=self.dtype):

         self.logger = self.__class__.__logger__
         self.dtype = kwargs.get("dtype", torch.bfloat16)
+        self.device = torch.device(kwargs.get("device", get_device()))
         self.configure_vae_model()
         self.use_sample = kwargs.get("use_sample", True)
         )
     def configure_vae_model(self):
+        device = self.device
         # 从 path_default.yaml 读取 VAE 路径
         try:
     @torch.no_grad()
     def vae_encode(self, samples: List[Tensor], **kwargs) -> List[Tensor]:
+        device = self.device
         latents = []
         with torch.autocast(device_type="cuda", dtype=self.dtype):
     @torch.no_grad()
     def vae_decode(self, latents: List[Tensor], **kwargs) -> List[Tensor]:
+        device = self.device
         samples = []
         with torch.autocast(device_type="cuda", dtype=self.dtype):