Skip to content

[tests] Add inference test slices for SD3 and remove unnecessary tests #12106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -124,37 +124,22 @@ def get_dummy_inputs(self, device, seed=0):
}
return inputs

def test_stable_diffusion_3_different_prompts(self):
pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)

inputs = self.get_dummy_inputs(torch_device)
output_same_prompt = pipe(**inputs).images[0]

inputs = self.get_dummy_inputs(torch_device)
inputs["prompt_2"] = "a different prompt"
inputs["prompt_3"] = "another different prompt"
output_different_prompts = pipe(**inputs).images[0]

max_diff = np.abs(output_same_prompt - output_different_prompts).max()

# Outputs should be different here
assert max_diff > 1e-2
Comment on lines -127 to -141
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to remove because we are not checking for any assertion slice. We don't get much issues for it, either.


def test_stable_diffusion_3_different_negative_prompts(self):
pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)

inputs = self.get_dummy_inputs(torch_device)
output_same_prompt = pipe(**inputs).images[0]
def test_inference(self):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)

inputs = self.get_dummy_inputs(torch_device)
inputs["negative_prompt_2"] = "deformed"
inputs["negative_prompt_3"] = "blurry"
output_different_prompts = pipe(**inputs).images[0]
image = pipe(**inputs).images[0]
generated_slice = image.flatten()
generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])

max_diff = np.abs(output_same_prompt - output_different_prompts).max()
# fmt: off
expected_slice = np.array([0.5112, 0.5228, 0.5235, 0.5524, 0.3188, 0.5017, 0.5574, 0.4899, 0.6812, 0.5991, 0.3908, 0.5213, 0.5582, 0.4457, 0.4204, 0.5616])
# fmt: on

# Outputs should be different here
assert max_diff > 1e-2
self.assertTrue(
np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice."
)

def test_fused_qkv_projections(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
Expand Down Expand Up @@ -268,40 +253,9 @@ def test_sd3_inference(self):

image = pipe(**inputs).images[0]
image_slice = image[0, :10, :10]
expected_slice = np.array(
[
0.4648,
0.4404,
0.4177,
0.5063,
0.4800,
0.4287,
0.5425,
0.5190,
0.4717,
0.5430,
0.5195,
0.4766,
0.5361,
0.5122,
0.4612,
0.4871,
0.4749,
0.4058,
0.4756,
0.4678,
0.3804,
0.4832,
0.4822,
0.3799,
0.5103,
0.5034,
0.3953,
0.5073,
0.4839,
0.3884,
]
)
# fmt: off
expected_slice = np.array([0.4648, 0.4404, 0.4177, 0.5063, 0.4800, 0.4287, 0.5425, 0.5190, 0.4717, 0.5430, 0.5195, 0.4766, 0.5361, 0.5122, 0.4612, 0.4871, 0.4749, 0.4058, 0.4756, 0.4678, 0.3804, 0.4832, 0.4822, 0.3799, 0.5103, 0.5034, 0.3953, 0.5073, 0.4839, 0.3884])
# fmt: on

max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,37 +128,22 @@ def get_dummy_inputs(self, device, seed=0):
}
return inputs

def test_stable_diffusion_3_img2img_different_prompts(self):
pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
def test_inference(self):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)

inputs = self.get_dummy_inputs(torch_device)
output_same_prompt = pipe(**inputs).images[0]

inputs = self.get_dummy_inputs(torch_device)
inputs["prompt_2"] = "a different prompt"
inputs["prompt_3"] = "another different prompt"
output_different_prompts = pipe(**inputs).images[0]

max_diff = np.abs(output_same_prompt - output_different_prompts).max()

# Outputs should be different here
assert max_diff > 1e-2

def test_stable_diffusion_3_img2img_different_negative_prompts(self):
pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)

inputs = self.get_dummy_inputs(torch_device)
output_same_prompt = pipe(**inputs).images[0]

inputs = self.get_dummy_inputs(torch_device)
inputs["negative_prompt_2"] = "deformed"
inputs["negative_prompt_3"] = "blurry"
output_different_prompts = pipe(**inputs).images[0]
image = pipe(**inputs).images[0]
generated_slice = image.flatten()
generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])

max_diff = np.abs(output_same_prompt - output_different_prompts).max()
# fmt: off
expected_slice = np.array([0.4564, 0.5486, 0.4868, 0.5923, 0.3775, 0.5543, 0.4807, 0.4177, 0.3778, 0.5957, 0.5726, 0.4333, 0.6312, 0.5062, 0.4838, 0.5984])
# fmt: on

# Outputs should be different here
assert max_diff > 1e-2
self.assertTrue(
np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice."
)

@unittest.skip("Skip for now.")
def test_multi_vae(self):
Expand Down Expand Up @@ -207,112 +192,16 @@ def test_sd3_img2img_inference(self):
inputs = self.get_inputs(torch_device)
image = pipe(**inputs).images[0]
image_slice = image[0, :10, :10]

# fmt: off
expected_slices = Expectations(
{
("xpu", 3): np.array(
[
0.5117,
0.4421,
0.3852,
0.5044,
0.4219,
0.3262,
0.5024,
0.4329,
0.3276,
0.4978,
0.4412,
0.3355,
0.4983,
0.4338,
0.3279,
0.4893,
0.4241,
0.3129,
0.4875,
0.4253,
0.3030,
0.4961,
0.4267,
0.2988,
0.5029,
0.4255,
0.3054,
0.5132,
0.4248,
0.3222,
]
),
("cuda", 7): np.array(
[
0.5435,
0.4673,
0.5732,
0.4438,
0.3557,
0.4912,
0.4331,
0.3491,
0.4915,
0.4287,
0.347,
0.4849,
0.4355,
0.3469,
0.4871,
0.4431,
0.3538,
0.4912,
0.4521,
0.3643,
0.5059,
0.4587,
0.373,
0.5166,
0.4685,
0.3845,
0.5264,
0.4746,
0.3914,
0.5342,
]
),
("cuda", 8): np.array(
[
0.5146,
0.4385,
0.3826,
0.5098,
0.4150,
0.3218,
0.5142,
0.4312,
0.3298,
0.5127,
0.4431,
0.3411,
0.5171,
0.4424,
0.3374,
0.5088,
0.4348,
0.3242,
0.5073,
0.4380,
0.3174,
0.5132,
0.4397,
0.3115,
0.5132,
0.4343,
0.3118,
0.5219,
0.4328,
0.3256,
]
),
("xpu", 3): np.array([0.5117, 0.4421, 0.3852, 0.5044, 0.4219, 0.3262, 0.5024, 0.4329, 0.3276, 0.4978, 0.4412, 0.3355, 0.4983, 0.4338, 0.3279, 0.4893, 0.4241, 0.3129, 0.4875, 0.4253, 0.3030, 0.4961, 0.4267, 0.2988, 0.5029, 0.4255, 0.3054, 0.5132, 0.4248, 0.3222]),
("cuda", 7): np.array([0.5435, 0.4673, 0.5732, 0.4438, 0.3557, 0.4912, 0.4331, 0.3491, 0.4915, 0.4287, 0.347, 0.4849, 0.4355, 0.3469, 0.4871, 0.4431, 0.3538, 0.4912, 0.4521, 0.3643, 0.5059, 0.4587, 0.373, 0.5166, 0.4685, 0.3845, 0.5264, 0.4746, 0.3914, 0.5342]),
("cuda", 8): np.array([0.5146, 0.4385, 0.3826, 0.5098, 0.4150, 0.3218, 0.5142, 0.4312, 0.3298, 0.5127, 0.4431, 0.3411, 0.5171, 0.4424, 0.3374, 0.5088, 0.4348, 0.3242, 0.5073, 0.4380, 0.3174, 0.5132, 0.4397, 0.3115, 0.5132, 0.4343, 0.3118, 0.5219, 0.4328, 0.3256]),
}
)
# fmt: on

expected_slice = expected_slices.get_expectation()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,37 +132,23 @@ def get_dummy_inputs(self, device, seed=0):
}
return inputs

def test_stable_diffusion_3_inpaint_different_prompts(self):
pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
def test_inference(self):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)

inputs = self.get_dummy_inputs(torch_device)
output_same_prompt = pipe(**inputs).images[0]
image = pipe(**inputs).images[0]
generated_slice = image.flatten()
generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])

inputs = self.get_dummy_inputs(torch_device)
inputs["prompt_2"] = "a different prompt"
inputs["prompt_3"] = "another different prompt"
output_different_prompts = pipe(**inputs).images[0]

max_diff = np.abs(output_same_prompt - output_different_prompts).max()

# Outputs should be different here
assert max_diff > 1e-2

def test_stable_diffusion_3_inpaint_different_negative_prompts(self):
pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)

inputs = self.get_dummy_inputs(torch_device)
output_same_prompt = pipe(**inputs).images[0]

inputs = self.get_dummy_inputs(torch_device)
inputs["negative_prompt_2"] = "deformed"
inputs["negative_prompt_3"] = "blurry"
output_different_prompts = pipe(**inputs).images[0]
# fmt: off
expected_slice = np.array([0.5035, 0.6661, 0.5859, 0.413, 0.4224, 0.4234, 0.7181, 0.5062, 0.5183, 0.6877, 0.5074, 0.585, 0.6111, 0.5422, 0.5306, 0.5891])
# fmt: on

max_diff = np.abs(output_same_prompt - output_different_prompts).max()

# Outputs should be different here
assert max_diff > 1e-2
self.assertTrue(
np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice."
)

@unittest.skip("Skip for now.")
def test_multi_vae(self):
pass
Loading