This paper reports on the development of a novel style guided diffusion model (SGDiff) which overcomes certain weaknesses inherent in existing models for image synthesis. The proposed SGDiff combines image modality with a pretrained text-to-image diffusion model to facilitate creative fashion image synthesis. It addresses the limitations of text-to-image diffusion models by incorporating supplementary style guidance, substantially reducing training costs, and overcoming the difficulties of controlling synthesized styles with text-only inputs. This paper also introduces a new dataset -- SG-Fashion, specifically designed for fashion image synthesis applications, offering high-resolution images and an extensive range of garment categories. By means of comprehensive ablation study, we examine the application of classifier-free guidance to a variety of conditions and validate the effectiveness of the proposed model for generating fashion images of the desired categories, product attributes, and styles. The contributions of this paper include a novel classifier-free guidance method for multi-modal feature fusion, a comprehensive dataset for fashion image synthesis application, a thorough investigation on conditioned text-to-image synthesis, and valuable insights for future research in the text-to-image synthesis domain. The code and dataset will be released once the paper is published.
@inproceedings{10.1145/3581783.3613806,
author = {Sun, Zhengwentai and Zhou, Yanghong and He, Honghong and Mok, P. Y.},
title = {SGDiff: A Style-Guided Diffusion Model for Fashion Synthesis},
year = {2023},
isbn = {979-8-4007-0108-5/23/10},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3581783.3613806},
doi = {10.1145/3581783.3613806},
booktitle = {Proceedings of the 31th ACM International Conference on Multimedia},
numpages = {10},
keywords = {fashion synthesis, style guidance, text-to-image, denoising diffusion probabilistic models},
location = {Ottawa, ON, Canada},
series = {MM '23}
}