diff --git "a/\330\257\330\256\330\252\330\261 \330\242\330\264\331\276\330\262" "b/\330\257\330\256\330\252\330\261 \330\242\330\264\331\276\330\262" new file mode 100644 index 0000000..efb9664 --- /dev/null +++ "b/\330\257\330\256\330\252\330\261 \330\242\330\264\331\276\330\262" @@ -0,0 +1,30 @@ +import torch +from diffusers import AutoModel, SkyReelsV2DiffusionForcingPipeline, UniPCMultistepScheduler +from diffusers.utils import export_to_video + +vae = AutoModel.from_pretrained("Skywork/SkyReels-V2-DF-14B-540P-Diffusers", subfolder="vae", torch_dtype=torch.float32) + +pipeline = SkyReelsV2DiffusionForcingPipeline.from_pretrained( + "Skywork/SkyReels-V2-DF-14B-540P-Diffusers", + vae=vae, + torch_dtype=torch.bfloat16 +) +flow_shift = 8.0 # 8.0 for T2V, 5.0 for I2V +pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift) +pipeline = pipeline.to("cuda") + +prompt = "A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window." + +output = pipeline( + prompt=prompt, + num_inference_steps=30, + height=544, # 720 for 720P + width=960, # 1280 for 720P + num_frames=97, + base_num_frames=97, # 121 for 720P + ar_step=5, # Controls asynchronous inference (0 for synchronous mode) + causal_block_size=5, # Number of frames in each block for asynchronous processing + overlap_history=None, # Number of frames to overlap for smooth transitions in long videos; 17 for long video generations + addnoise_condition=20, # Improves consistency in long video generation +).frames[0] +export_to_video(output, "T2V.mp4", fps=24, quality=8)