ffy2000 commited on
Commit
8a696da
·
1 Parent(s): 2c3399c

update lance_gradio

Browse files
app.py CHANGED
@@ -150,7 +150,7 @@ IMAGE_ASPECT_RATIO_TO_SIZE = {
150
  }
151
  DEFAULT_GPUS = "0"
152
  DEFAULT_QUEUE_SIZE = 32
153
- DEFAULT_CONCURRENCY_LIMIT = 2
154
  USE_KVCACHE = True
155
  TEXT_TEMPLATE = True
156
  RECORD_WRITE_LOCK = threading.Lock()
@@ -2677,7 +2677,7 @@ class LanceT2VV2TPipeline:
2677
  if inference_args.visual_gen:
2678
  stage_start = time.perf_counter()
2679
  print(f"[startup][gpu:{self.device}] Initializing VAE", flush=True)
2680
- vae_model = WanVideoVAE()
2681
  vae_config = deepcopy(vae_model.vae_config)
2682
  self._log_stage("VAE init", stage_start)
2683
  else:
@@ -3196,7 +3196,6 @@ class PipelinePool:
3196
  self.release(pipeline)
3197
 
3198
 
3199
- PIPELINE_POOLS: dict[str, PipelinePool] = {}
3200
  ACTIVE_PIPELINE_POOL: Optional[PipelinePool] = None
3201
  ACTIVE_POOL_LOCK = threading.Lock()
3202
  QUEUE_MAX_SIZE = DEFAULT_QUEUE_SIZE
@@ -3268,44 +3267,44 @@ ZERO_GPU_RUN_TASK_DURATION_SECONDS = get_zerogpu_duration_cap()
3268
  def is_pipeline_pool_ready_for_variant(model_variant: str) -> bool:
3269
  normalized_variant = normalize_model_variant(model_variant)
3270
  with ACTIVE_POOL_LOCK:
3271
- pool = PIPELINE_POOLS.get(normalized_variant)
3272
- return bool(pool is not None and pool.is_initialized)
 
 
 
3273
 
3274
 
3275
  def is_pipeline_pool_ready_for_task(task: str) -> bool:
3276
  return is_pipeline_pool_ready_for_variant(get_task_model_variant(task))
3277
 
3278
 
3279
- def get_or_create_pipeline_pool(model_variant: str) -> PipelinePool:
 
3280
  if not torch.cuda.is_available():
3281
  raise RuntimeError(
3282
  "Lance inference requires a GPU. The Gradio UI can start on CPU, but generation is disabled "
3283
  "until GPU hardware is attached."
3284
  )
3285
- normalized_variant = normalize_model_variant(model_variant)
3286
  gpu_ids = parse_gpu_ids(os.getenv("LANCE_GPUS", DEFAULT_GPUS))
3287
  with ACTIVE_POOL_LOCK:
3288
- pool = PIPELINE_POOLS.get(normalized_variant)
3289
- if pool is None:
3290
- pool = PipelinePool(gpu_ids, model_variant=normalized_variant)
3291
- PIPELINE_POOLS[normalized_variant] = pool
3292
- return pool
3293
-
3294
-
3295
- def ensure_pipeline_pool_ready(model_variant: str) -> PipelinePool:
3296
- pool = get_or_create_pipeline_pool(model_variant)
3297
- if not pool.is_initialized:
3298
- pool.initialize_all()
3299
- return pool
3300
 
 
 
 
 
 
 
 
 
3301
 
3302
- def get_pipeline_pool(task: str) -> PipelinePool:
3303
- global ACTIVE_PIPELINE_POOL
3304
- model_variant = get_task_model_variant(task)
3305
- pool = ensure_pipeline_pool_ready(model_variant)
3306
- with ACTIVE_POOL_LOCK:
3307
- ACTIVE_PIPELINE_POOL = pool
3308
- return pool
3309
 
3310
 
3311
  def finalize_zerogpu_duration(estimated_seconds: float, task: str) -> int:
@@ -3575,18 +3574,15 @@ def build_status_markdown() -> str:
3575
  gpu_text = "unknown"
3576
  pipeline_slots = 0
3577
  active_variant = "none"
3578
- cached_variants = "none"
3579
- if ACTIVE_PIPELINE_POOL is not None:
3580
- active_variant = ACTIVE_PIPELINE_POOL.model_variant
3581
- gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
3582
- pipeline_slots = ACTIVE_PIPELINE_POOL.size
3583
  with ACTIVE_POOL_LOCK:
3584
- if PIPELINE_POOLS:
3585
- cached_variants = ",".join(sorted(PIPELINE_POOLS.keys()))
 
 
3586
  return (
3587
  f"**Status** GPU: `{gpu_text}` | Queue concurrency: `{QUEUE_CONCURRENCY_LIMIT}` | "
3588
  f"Pipeline slots: `{pipeline_slots}` | Queue limit: `{QUEUE_MAX_SIZE}` | "
3589
- f"Active model: `{active_variant}` | Cached variants: `{cached_variants}`"
3590
  )
3591
 
3592
 
 
150
  }
151
  DEFAULT_GPUS = "0"
152
  DEFAULT_QUEUE_SIZE = 32
153
+ DEFAULT_CONCURRENCY_LIMIT = 1
154
  USE_KVCACHE = True
155
  TEXT_TEMPLATE = True
156
  RECORD_WRITE_LOCK = threading.Lock()
 
2677
  if inference_args.visual_gen:
2678
  stage_start = time.perf_counter()
2679
  print(f"[startup][gpu:{self.device}] Initializing VAE", flush=True)
2680
+ vae_model = WanVideoVAE(device=torch.device("cuda", self.device))
2681
  vae_config = deepcopy(vae_model.vae_config)
2682
  self._log_stage("VAE init", stage_start)
2683
  else:
 
3196
  self.release(pipeline)
3197
 
3198
 
 
3199
  ACTIVE_PIPELINE_POOL: Optional[PipelinePool] = None
3200
  ACTIVE_POOL_LOCK = threading.Lock()
3201
  QUEUE_MAX_SIZE = DEFAULT_QUEUE_SIZE
 
3267
  def is_pipeline_pool_ready_for_variant(model_variant: str) -> bool:
3268
  normalized_variant = normalize_model_variant(model_variant)
3269
  with ACTIVE_POOL_LOCK:
3270
+ return bool(
3271
+ ACTIVE_PIPELINE_POOL is not None
3272
+ and ACTIVE_PIPELINE_POOL.model_variant == normalized_variant
3273
+ and ACTIVE_PIPELINE_POOL.is_initialized
3274
+ )
3275
 
3276
 
3277
  def is_pipeline_pool_ready_for_task(task: str) -> bool:
3278
  return is_pipeline_pool_ready_for_variant(get_task_model_variant(task))
3279
 
3280
 
3281
+ def get_pipeline_pool(task: str) -> PipelinePool:
3282
+ global ACTIVE_PIPELINE_POOL
3283
  if not torch.cuda.is_available():
3284
  raise RuntimeError(
3285
  "Lance inference requires a GPU. The Gradio UI can start on CPU, but generation is disabled "
3286
  "until GPU hardware is attached."
3287
  )
3288
+ model_variant = get_task_model_variant(task)
3289
  gpu_ids = parse_gpu_ids(os.getenv("LANCE_GPUS", DEFAULT_GPUS))
3290
  with ACTIVE_POOL_LOCK:
3291
+ if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
3292
+ if not ACTIVE_PIPELINE_POOL.is_initialized:
3293
+ ACTIVE_PIPELINE_POOL.initialize_all()
3294
+ return ACTIVE_PIPELINE_POOL
 
 
 
 
 
 
 
 
3295
 
3296
+ if ACTIVE_PIPELINE_POOL is not None:
3297
+ previous_variant = ACTIVE_PIPELINE_POOL.model_variant
3298
+ print(
3299
+ f"[runtime] Switching Lance model from {previous_variant} to {model_variant}.",
3300
+ flush=True,
3301
+ )
3302
+ ACTIVE_PIPELINE_POOL.unload_all()
3303
+ ACTIVE_PIPELINE_POOL = None
3304
 
3305
+ ACTIVE_PIPELINE_POOL = PipelinePool(gpu_ids, model_variant=model_variant)
3306
+ ACTIVE_PIPELINE_POOL.initialize_all()
3307
+ return ACTIVE_PIPELINE_POOL
 
 
 
 
3308
 
3309
 
3310
  def finalize_zerogpu_duration(estimated_seconds: float, task: str) -> int:
 
3574
  gpu_text = "unknown"
3575
  pipeline_slots = 0
3576
  active_variant = "none"
 
 
 
 
 
3577
  with ACTIVE_POOL_LOCK:
3578
+ if ACTIVE_PIPELINE_POOL is not None:
3579
+ active_variant = ACTIVE_PIPELINE_POOL.model_variant
3580
+ gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
3581
+ pipeline_slots = ACTIVE_PIPELINE_POOL.size
3582
  return (
3583
  f"**Status** GPU: `{gpu_text}` | Queue concurrency: `{QUEUE_CONCURRENCY_LIMIT}` | "
3584
  f"Pipeline slots: `{pipeline_slots}` | Queue limit: `{QUEUE_MAX_SIZE}` | "
3585
+ f"Active model: `{active_variant}`"
3586
  )
3587
 
3588
 
data/datasets_custom/validation_dataset.py CHANGED
@@ -242,10 +242,15 @@ class ValidationDataset(Dataset):
242
  video_reader = VideoReader(video_stream, ctx=decord.cpu(worker_id % self.cpu_count))
243
  total_frames = len(video_reader)
244
 
 
 
 
 
 
245
  frames_info = {
246
- "clip_indices": [(0, total_frames)],
247
- "fps": 24,
248
- }
249
 
250
  frames_sampler_output: FrameSamplerOutput = self.frame_sampler(frames_info)
251
  video_frames = self._read_decord(video_reader, frames_sampler_output.indices)
 
242
  video_reader = VideoReader(video_stream, ctx=decord.cpu(worker_id % self.cpu_count))
243
  total_frames = len(video_reader)
244
 
245
+ try:
246
+ fps = int(round(float(video_reader.get_avg_fps())))
247
+ except Exception:
248
+ fps = 24
249
+
250
  frames_info = {
251
+ "clip_indices": [(0, total_frames)],
252
+ "fps": fps,
253
+ }
254
 
255
  frames_sampler_output: FrameSamplerOutput = self.frame_sampler(frames_info)
256
  video_frames = self._read_decord(video_reader, frames_sampler_output.indices)
lance_gradio/recommended_outputs/t2v/000000__sig-c2df7ef33f058e4e82a7.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d9f1df5138213c7cd6d01dbe0e76588d4198aeedb7ddc7674666f0c2326ecaa
3
+ size 1162448
lance_gradio/recommended_outputs/t2v/000001__sig-ac3597a59fd4b21b74af.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:396b2e4e50c35e103a061ba195976cf97fded516624b725e6f3c82302b1325f1
3
+ size 1370505
lance_gradio/recommended_outputs/t2v/000002__sig-b61630761c595abe4d47.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eab3b95d86927c601f26361f4cbe52a2b153454af97ab28a3e76b64e98fa9c9
3
+ size 872768
lance_gradio/recommended_outputs/t2v/000004__sig-19a35c3c77bc2ac6ebe0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b2de3a7ea77dde9dfc300c607e4d7895ca3bd8ca9e013c32cd80c27831c740
3
+ size 377878
lance_gradio/recommended_outputs/t2v/000005__sig-edfae9c14d5b87228251.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb6569439ec2dfd2490d78f5c41a3daa43e6d9d0ed1d754adb4b10205e9bb8d3
3
+ size 1015510
lance_gradio/recommended_outputs/t2v/000007__sig-0287f57acc7e0a19455a.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da4d9faafe6aeaa4d8f7f219d29693aeb91de1b76eb61ada26a23eb09409eebb
3
+ size 428967
lance_gradio/recommended_outputs/t2v/000008__sig-b80fdb9a8b3e5eaf18c2.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9ed73ecbc68be835272cd669703a5cb006add403eeca3f9595cbd4d0a1a563
3
+ size 658964
lance_gradio/recommended_outputs/video_edit/video_edit_example_000000__sig-8a88ccb08f4268c9a022.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8f02b8dfc45f79a23d8723c6b199f809870992ae96bf0d1373c102b5ad93ba
3
+ size 355592
lance_gradio/recommended_outputs/video_edit/video_edit_example_000001__sig-d8f24547f0161c9a0cb8.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:029a8a376ac32b1666ffcb2b4f4d8e460bccdad274081328d6e7dced2d414a9a
3
+ size 164643
lance_gradio/recommended_outputs/video_edit/video_edit_example_000002__sig-8001f001fa5822d8b2c4.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab6f3f83c2ba46cea5fc85606cc8c81a8ca6e0cf37fe6758d823b588621969d
3
+ size 428036
lance_gradio/recommended_outputs/x2t_video/x2t_video_example_000000__sig-f771bb1b029b935df571.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ In this video, we see a butterfly and a bee interacting with each other and a flower. The butterfly is resting on the flower, and the bee is flying around it. Both the butterfly and the bee are part of the process of pollination. The butterfly consumes nectar from the flower, while the bee gathers pollen. This interaction is a clear display of the pollination process, with the butterfly and bee working in tandem to obtain food. The video also shows the beautiful details of the butterfly's wings and the bee's body structure as they move around the flower.<|im_end|>
lance_gradio/recommended_outputs/x2t_video/x2t_video_example_000001__sig-54a7f19e0169fbfed57b.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ In the video, a woman is cooking in a kitchen. She is using a large metal bowl to mix ingredients. There are some red onions and red peppers on the table beside her. She is adding some tomato puree to the bowl and stirring it with a spoon. The camera focuses on her hands as she mixes the ingredients, and weaves the tomato puree into the mixture. The onions and red peppers seem to be part of the seasoning or garnishing of the dish. The whole process of mixing the ingredients is shown in a simple and clear way, making the audience feel the warmth and comfort of home-cooked food.<|im_end|>
modeling/vae/wan/model.py CHANGED
@@ -43,6 +43,7 @@ class WanVideoVAE(object):
43
  self.logger = self.__class__.__logger__
44
 
45
  self.dtype = kwargs.get("dtype", torch.bfloat16)
 
46
  self.configure_vae_model()
47
  self.use_sample = kwargs.get("use_sample", True)
48
 
@@ -56,7 +57,7 @@ class WanVideoVAE(object):
56
  )
57
 
58
  def configure_vae_model(self):
59
- device = get_device()
60
 
61
  # 从 path_default.yaml 读取 VAE 路径
62
  try:
@@ -72,7 +73,7 @@ class WanVideoVAE(object):
72
 
73
  @torch.no_grad()
74
  def vae_encode(self, samples: List[Tensor], **kwargs) -> List[Tensor]:
75
- device = get_device()
76
 
77
  latents = []
78
  with torch.autocast(device_type="cuda", dtype=self.dtype):
@@ -92,7 +93,7 @@ class WanVideoVAE(object):
92
 
93
  @torch.no_grad()
94
  def vae_decode(self, latents: List[Tensor], **kwargs) -> List[Tensor]:
95
- device = get_device()
96
 
97
  samples = []
98
  with torch.autocast(device_type="cuda", dtype=self.dtype):
 
43
  self.logger = self.__class__.__logger__
44
 
45
  self.dtype = kwargs.get("dtype", torch.bfloat16)
46
+ self.device = torch.device(kwargs.get("device", get_device()))
47
  self.configure_vae_model()
48
  self.use_sample = kwargs.get("use_sample", True)
49
 
 
57
  )
58
 
59
  def configure_vae_model(self):
60
+ device = self.device
61
 
62
  # 从 path_default.yaml 读取 VAE 路径
63
  try:
 
73
 
74
  @torch.no_grad()
75
  def vae_encode(self, samples: List[Tensor], **kwargs) -> List[Tensor]:
76
+ device = self.device
77
 
78
  latents = []
79
  with torch.autocast(device_type="cuda", dtype=self.dtype):
 
93
 
94
  @torch.no_grad()
95
  def vae_decode(self, latents: List[Tensor], **kwargs) -> List[Tensor]:
96
+ device = self.device
97
 
98
  samples = []
99
  with torch.autocast(device_type="cuda", dtype=self.dtype):