Article(id=1149773879670960988, tenantId=1146029695717560320, journalId=1146123166801305609, issueId=1149773869357167407, articleNumber=null, orderNo=null, doi=10.12404/j.issn.1671-1815.2403342, pmid=null, cstr=null, oa=null, hot=null, price=null, onlineType=0, articleFormat=0, articleType=null, articleTypeStr=null, receivedDate=1715011200000, receivedDateStr=2024-05-07, revisedDate=1736438400000, revisedDateStr=2025-01-10, acceptedDate=null, acceptedDateStr=null, onlineDate=1752057054673, onlineDateStr=2025-07-09, pubDate=1746633600000, pubDateStr=2025-05-08, doiRegisterDate=null, doiRegisterDateStr=null, onlineIssueDate=1752057054673, onlineIssueDateStr=2025-07-09, onlineJustAcceptDate=null, onlineJustAcceptDateStr=null, onlineFirstDate=null, onlineFirstDateStr=null, sourceXml=null, magXml=null, createTime=1752057054673, creator=13701087609, updateTime=1752057054673, updator=13701087609, issue=Issue{id=1149773869357167407, tenantId=1146029695717560320, journalId=1146123166801305609, year='2025', volume='25', issue='13', pageStart='5273', pageEnd='5704', issueExtLink='null', onlineDate='null', pubDate='null', beforeIssueId=null, nextIssueId=null, price=null, status=1, issueComplete=1, articleOrder=1, issueType=-1, specialIssue=0, createTime=1752057052207, creator=13701087609, updateTime=1768456769392, updator=13701087609, preIssue=null, nextIssue=null, ext={EN=IssueExt(id=1218559268744253990, tenantId=1146029695717560320, journalId=1146123166801305609, issueId=1149773869357167407, language=EN, specialIssueTitle=, coverIllustrator=, specialIssueEditor=, specialIssueAbout=), CN=IssueExt(id=1218559268744253991, tenantId=1146029695717560320, journalId=1146123166801305609, issueId=1149773869357167407, language=CN, specialIssueTitle=, coverIllustrator=, specialIssueEditor=, specialIssueAbout=)}, issueFiles=null}, startPage=5515, endPage=5526, ext={EN=ArticleExt(id=1149773880031671138, articleId=1149773879670960988, tenantId=1146029695717560320, journalId=1146123166801305609, language=EN, title=Audio-based High-resolution Face Portrait Method, columnId=1156262729162810294, journalTitle=Science Technology and Engineering, columnName=Papers·Automation and Computational Technology, runingTitle=null, highlight=null, articleAbstract=
Existing voice-driven facial generation methods still face challenges in feature extraction and generation quality, and have yet to fully explore the deep correlation between audio and facial features. To address above mentioned issues, a research approach that combines Mel frequency cepstral coefficients (MFCC) was proposedfor audio feature extraction with the image generation capabilities of the second generation of style generative adversarial networks (StyleGAN2) was proposed. In terms of audio processing, MFCC was employed as the feature extraction method. To more effectively extract and transmit features from the audio, a ResNet18-based residual module was designed and integrated with the squeeze-and-excitation (SE) attention mechanism. Additionally, the activation function in the original residual blocks was optimized and improved by using the Mish activation function, aiming to mitigate the gradient vanishing problem in deep networks, maintain the integrity of feature information, and enhance the accuracy and generalization ability of the model. The StyleGAN2 model was then utilized as the facial image generation model. Experimental results demonstrate that the integration of the designed audio processing network with the StyleGAN2 facial generation model exhibits outstanding performance in the task of voice-driven facial generation. Through comprehensive evaluation using metrics such as Fréchet inception distance (FID) and path length, the proposed method shows a significant improvement in generation quality compared to existing methods, thus fully proving its effectiveness and superiority.
, correspAuthors=Fan-liang BU, authorNote=null, correspAuthorsNote=null, copyrightStatement=null, copyrightOwner=null, extLink=null, articleAbsUrl=null, sourceXml=null, magXml=null, pdfUrl=null, pdf=null, pdfFileSize=null, pdfExtLink=null, richHtmlUrl=null, mobilePdfUrl=null, reviewReport=null, pdfFirstPage=null, abstractGraph=null, abstractGraphContent=null, abstractVideo=null, citation=null, cebUrl=null, magXmlContent=null, mapNumber=null, authorCompany=null, fund=null, authors=null, authorsList=Hao-ming QIN, Fan-liang BU, Fang-hao ZHONG), CN=ArticleExt(id=1149773920108245448, articleId=1149773879670960988, tenantId=1146029695717560320, journalId=1146123166801305609, language=CN, title=基于音频的高分辨率人脸画像方法, columnId=1156262729783567290, journalTitle=科学技术与工程, columnName=论文·自动化技术、计算机技术, runingTitle=null, highlight=null, articleAbstract=
现有的语音驱动人脸生成方法在特征提取与生成质量上仍面临挑战,且尚未充分挖掘音频与人脸特征之间的深层关联。为解决这些问题,提出一种结合梅尔频率倒谱系数(Mel frequency cepstral coefficients,MFCC)音频特征提取与第二代样式生成对抗网络(style generative adversarial net-works 2,StyleGAN2)图像生成技术的研究方法。在音频处理方面,采用了梅尔频率倒谱系数作为特征提取方法。为了更有效地从音频中提取和传递特征,设计了一种基于ResNet18的残差模块,并融入了SE(squeeze-and-excitation)注意力机制。同时对原残差块中的激活函数进行了优化改进,采用Mish激活函数,旨在减少深层网络中的梯度消失问题,保持特征信息的完整性并提高模型的准确性和泛化能力。采取StyleGAN2模型作为人脸图像的生成模型。实验结果表明,结合了设计的音频处理网络和StyleGAN2的人脸生成模型,在语音驱动的人脸生成任务中展现出了卓越的性能。通过综合评估Fréchet起始距离(Fréchet inception distance,FID)和路径长度等指标,本文方法在语音驱动的人脸生成任务中相较于现有方法,在生成质量上有显著提升,充分证明了所提方法的有效性和优越性。
, correspAuthors=卜凡亮, authorNote=null, correspAuthorsNote=
, copyrightStatement=null, copyrightOwner=null, extLink=null, articleAbsUrl=null, sourceXml=/5+2MHD/nqEgUS2xcFJfmA==, magXml=nPshX7Lw07zn8QIZbJ+pWw==, pdfUrl=null, pdf=j9tNOI6cKp7DrFAHXVEH2w==, pdfFileSize=null, pdfExtLink=null, richHtmlUrl=null, mobilePdfUrl=null, reviewReport=null, pdfFirstPage=null, abstractGraph=null, abstractGraphContent=null, abstractVideo=null, citation=null, cebUrl=null, magXmlContent=61q+L3SuGeNRAJLNkSP36A==, mapNumber=null, authorCompany=null, fund=null, authors=
, authorsList=秦昊铭, 卜凡亮, 钟方昊)}, authors=[Author(id=1175114743917462525, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, orderNo=0, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=2022211483@stu.ppsuc.edu.cn, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1175114743980377087, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, authorId=1175114743917462525, language=EN, stringName=Hao-ming QIN, firstName=Hao-ming, middleName=null, lastName=QIN, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=null, address=School of Information Network Security, People's Public Security University of China, Beijing 100038, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1175114744043291648, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, authorId=1175114743917462525, language=CN, stringName=秦昊铭, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=null, address=中国人民公安大学信息网络安全学院, 北京 100038, bio={"content":"
秦昊铭(1998—),男,汉族,湖北宜昌人,硕士研究生。研究方向:计算机视觉、多模态学习。E-mail:2022211483@stu.ppsuc.edu.cn。
"}, bioImg=null, bioContent=
秦昊铭(1998—),男,汉族,湖北宜昌人,硕士研究生。研究方向:计算机视觉、多模态学习。E-mail:2022211483@stu.ppsuc.edu.cn。
, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1175114743850353657, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, xref=null, ext=[AuthorCompanyExt(id=1175114743858742266, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=School of Information Network Security, People's Public Security University of China, Beijing 100038, China), AuthorCompanyExt(id=1175114743862936571, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=中国人民公安大学信息网络安全学院, 北京 100038)])]), Author(id=1175114744110399489, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, orderNo=1, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=bufanliang@sina.com, emailSecond=null, emailThird=null, correspondingAuthor=1, authorType=1, ext={EN=AuthorExt(id=1175114744181702659, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, authorId=1175114744110399489, language=EN, stringName=Fan-liang BU, firstName=Fan-liang, middleName=null, lastName=BU, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
*, address=School of Information Network Security, People's Public Security University of China, Beijing 100038, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1175114744244617220, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, authorId=1175114744110399489, language=CN, stringName=卜凡亮, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
*, address=中国人民公安大学信息网络安全学院, 北京 100038, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1175114743850353657, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, xref=null, ext=[AuthorCompanyExt(id=1175114743858742266, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=School of Information Network Security, People's Public Security University of China, Beijing 100038, China), AuthorCompanyExt(id=1175114743862936571, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=中国人民公安大学信息网络安全学院, 北京 100038)])]), Author(id=1175114744299143174, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, orderNo=2, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=null, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1175114744349474824, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, authorId=1175114744299143174, language=EN, stringName=Fang-hao ZHONG, firstName=Fang-hao, middleName=null, lastName=ZHONG, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=null, address=School of Information Network Security, People's Public Security University of China, Beijing 100038, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1175114744416583689, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, authorId=1175114744299143174, language=CN, stringName=钟方昊, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=null, address=中国人民公安大学信息网络安全学院, 北京 100038, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1175114743850353657, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, xref=null, ext=[AuthorCompanyExt(id=1175114743858742266, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=School of Information Network Security, People's Public Security University of China, Beijing 100038, China), AuthorCompanyExt(id=1175114743862936571, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=中国人民公安大学信息网络安全学院, 北京 100038)])])], keywords=[Keyword(id=1175114744554995722, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, orderNo=1, keyword=voice-to-face generation), Keyword(id=1175114744638881803, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, orderNo=2, keyword=Mel frequency cepstral coefficients), Keyword(id=1175114744697602060, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, orderNo=3, keyword=style generative adversarial networks), Keyword(id=1175114744747933709, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, orderNo=4, keyword=attention mechanism), Keyword(id=1175114744802459662, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, orderNo=1, keyword=语音生成人脸), Keyword(id=1175114744856985615, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, orderNo=2, keyword=梅尔频率倒谱系数), Keyword(id=1175114744907317264, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, orderNo=3, keyword=样式生成对抗网络), Keyword(id=1175114744957648913, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, orderNo=4, keyword=注意力机制)], refs=[Reference(id=1175114747662975030, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2002, volume=5, issue=4, pageStart=356, pageEnd=363, url=null, language=null, rfNumber=[1], rfOrder=0, authorNames=Lewicki M S, journalName=Nature Neuroscience, refType=null, unstructuredReference=
Lewicki M S. Efficient coding of natural sounds[J].
Nature Neuroscience,
2002,
5(4): 356-363., articleTitle=Efficient coding of natural sounds, refAbstract=null), Reference(id=1175114747721695287, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2011, volume=null, issue=null, pageStart=689, pageEnd=696, url=null, language=null, rfNumber=[2], rfOrder=1, authorNames=Ngiam J, Khosla A, Kim M, journalName=Proceedings of the 28th International Conference on Machine Learning (ICML-11), refType=null, unstructuredReference=
Ngiam J,
Khosla A,
Kim M, et al. Multimodal deep learning[C]//
Proceedings of the 28th International Conference on Machine Learning (ICML-11). Washington: International Conference on Machine Learning,
2011: 689-696., articleTitle=Multimodal deep learning, refAbstract=null), Reference(id=1175114747788804152, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2018, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[3], rfOrder=2, authorNames=Owens A, Efros A A, journalName=ArXiv, refType=null, unstructuredReference=
Owens A,
Efros A A. Audio-visual scene analysis with self-supervised multisensory features[J].
ArXiv,
2018: 1804.03641., articleTitle=Audio-visual scene analysis with self-supervised multisensory features, refAbstract=null), Reference(id=1175114747843330105, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2017, volume=null, issue=null, pageStart=609, pageEnd=617, url=null, language=null, rfNumber=[4], rfOrder=3, authorNames=Arandjelovic R, Zisserman A, journalName=Proceedings of the IEEE International Conference on Computer Vision (ICCV), refType=null, unstructuredReference=
Arandjelovic R,
Zisserman A. Look, listen and learn[C]//
Proceedings of the IEEE International Conference on Computer Vision (ICCV). Venice: IEEE,
2017: 609-617., articleTitle=Look, listen and learn, refAbstract=null), Reference(id=1175114747906244666, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[5], rfOrder=4, authorNames=Zhou Y, Wang Z, Fang C, journalName=ArXiv, refType=null, unstructuredReference=
Zhou Y,
Wang Z,
Fang C, et al. Talking face generation by adversarially disentangled audio-visual representation[J].
ArXiv,
2019: 1807.07860., articleTitle=Talking face generation by adversarially disentangled audio-visual representation, refAbstract=null), Reference(id=1175114748032073787, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2014, volume=null, issue=null, pageStart=2672, pageEnd=2680, url=null, language=null, rfNumber=[6], rfOrder=5, authorNames=Goodfellow I, Pouget-Abadie J, Mirza M, journalName=Advances in Neural Information Processing Systems, refType=null, unstructuredReference=
Goodfellow I,
Pouget-Abadie J,
Mirza M, et al. Generative adversarial nets[C]//
Advances in Neural Information Processing Systems. Montreal: ACM,
2014: 2672-2680., articleTitle=Generative adversarial nets, refAbstract=null), Reference(id=1175114748103376956, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2016, volume=null, issue=null, pageStart=87, pageEnd=103, url=null, language=null, rfNumber=[7], rfOrder=6, authorNames=Chung J S, Zisserman A, journalName=Asian Conference on Computer Vision. Berlin:Springer, refType=null, unstructuredReference=
Chung J S,
Zisserman A. Lip reading in the wild[C]//
Asian Conference on Computer Vision. Berlin:Springer,
2016: 87-103., articleTitle=Lip reading in the wild, refAbstract=null), Reference(id=1175114748178874429, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2024, volume=24, issue=7, pageStart=2804, pageEnd=2812, url=null, language=null, rfNumber=[8], rfOrder=7, authorNames=李俊屿, 卜凡亮, 谭林, journalName=科学技术与工程, refType=null, unstructuredReference=李俊屿, 卜凡亮, 谭林, 等. 基于多模态共享网络的自监督语音-人脸跨模态关联学习方法[J].
科学技术与工程,
2024,
24(7): 2804-2812., articleTitle=基于多模态共享网络的自监督语音-人脸跨模态关联学习方法, refAbstract=null), Reference(id=1175114748229206078, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2024, volume=24, issue=7, pageStart=2804, pageEnd=2812, url=null, language=null, rfNumber=[8], rfOrder=8, authorNames=Li Junyu, Bu Fanliang, Tan Lin, journalName=Science Technology and Engineering, refType=null, unstructuredReference=
Li Junyu,
Bu Fanliang,
Tan Lin, et al. Self-supervised voice-face cross-modal association learning method
via multi-modal shared network[J].
Science Technology and Engineering,
2024,
24(7): 2804-2812., articleTitle=Self-supervised voice-face cross-modal association learning method
via multi-modal shared network, refAbstract=null), Reference(id=1175114748296314943, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2004, volume=28, issue=4, pageStart=239, pageEnd=262, url=null, language=null, rfNumber=[9], rfOrder=9, authorNames=Smith E R, Zaidel D W, journalName=Journal of Nonverbal Behavior, refType=null, unstructuredReference=
Smith E R,
Zaidel D W. Facial and vocal cues in perception of trustworthiness[J].
Journal of Nonverbal Behavior,
2004,
28(4): 239-262., articleTitle=Facial and vocal cues in perception of trustworthiness, refAbstract=null), Reference(id=1175114748359229504, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2015, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[10], rfOrder=10, authorNames=Radford A, Metz L, Chintala S, journalName=ArXiv Preprint ArXiv, refType=null, unstructuredReference=
Radford A,
Metz L,
Chintala S. Unsupervised representation learning with deep convolutional generative adversarial networks[J].
ArXiv Preprint ArXiv,
2015: 1511.06434., articleTitle=Unsupervised representation learning with deep convolutional generative adversarial networks, refAbstract=null), Reference(id=1175114748417949761, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2018, volume=null, issue=null, pageStart=8427, pageEnd=8436, url=null, language=null, rfNumber=[11], rfOrder=11, authorNames=Nagrani A, Zisserman A, journalName=Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), refType=null, unstructuredReference=
Nagrani A,
Zisserman A. Seeing voices and hearing faces: cross-modal biometric matching[C]//
Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Salt Lake City: IEEE,
2018: 8427-8436., articleTitle=Seeing voices and hearing faces: cross-modal biometric matching, refAbstract=null), Reference(id=1175114748493447234, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[12], rfOrder=12, authorNames=Duarte A, Hidalgo G, Lopes A T, journalName=ArXiv Preprint ArXiv, refType=null, unstructuredReference=
Duarte A,
Hidalgo G,
Lopes A T, et al. Wav2Pix: speech-conditioned face generation using generative adversarial networks[J].
ArXiv Preprint ArXiv,
2019: 1901.03396., articleTitle=Wav2Pix: speech-conditioned face generation using generative adversarial networks, refAbstract=null), Reference(id=1175114748547973187, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2016, volume=null, issue=null, pageStart=499, pageEnd=515, url=null, language=null, rfNumber=[13], rfOrder=13, authorNames=Wen Y, Zhang K, Li Z, journalName=European Conference on Computer Vision (ECCV). Berlin: Springer, Cham, refType=null, unstructuredReference=
Wen Y,
Zhang K,
Li Z, et al. A discriminative feature learning approach for deep face recognition[C]//
European Conference on Computer Vision (ECCV). Berlin: Springer, Cham,
2016: 499-515., articleTitle=A discriminative feature learning approach for deep face recognition, refAbstract=null), Reference(id=1175114748598304836, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2017, volume=null, issue=null, pageStart=2242, pageEnd=2251, url=null, language=null, rfNumber=[14], rfOrder=14, authorNames=Zhu J Y, Park T, Isola P, journalName=Proceedings of the IEEE International Conference on Computer Vision (ICCV). Venice, refType=null, unstructuredReference=
Zhu J Y,
Park T,
Isola P, et al. Unpaired image-to-image translation using cycle-consistent adversarial networks[C]//
Proceedings of the IEEE International Conference on Computer Vision (ICCV). Venice, Italy: IEEE,
2017: 2242-2251., articleTitle=Unpaired image-to-image translation using cycle-consistent adversarial networks, refAbstract=null), Reference(id=1175114748648636485, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2017, volume=36, issue=null, pageStart=95, pageEnd=null, url=null, language=null, rfNumber=[15], rfOrder=15, authorNames=Suwajanakorn S, Seitz S M, Kemelmacher-Shlizerman I, journalName=ACM Transactions on Graphics, refType=null, unstructuredReference=
Suwajanakorn S,
Seitz S M,
Kemelmacher-Shlizerman I. Synthesizing obama: learning lip sync from audio[J].
ACM Transactions on Graphics,
2017,
36: 95., articleTitle=Synthesizing obama: learning lip sync from audio, refAbstract=null), Reference(id=1175114748703162438, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2020, volume=20, issue=1, pageStart=252, pageEnd=257, url=null, language=null, rfNumber=[16], rfOrder=16, authorNames=张珂, 侯捷, journalName=科学技术与工程, refType=null, unstructuredReference=张珂, 侯捷. 基于改进的卷积神经网络图像识别方法[J].
科学技术与工程,
2020,
20(1): 252-257., articleTitle=基于改进的卷积神经网络图像识别方法, refAbstract=null), Reference(id=1175114748774465607, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2020, volume=20, issue=1, pageStart=252, pageEnd=257, url=null, language=null, rfNumber=[16], rfOrder=17, authorNames=Zhang Ke, Hou Jie, journalName=Science Technology and Engineering, refType=null, unstructuredReference=
Zhang Ke,
Hou Jie. Research on image recognition method based on improved convolution neural network[J].
Science Technology and Engineering,
2020,
20(1): 252-257., articleTitle=Research on image recognition method based on improved convolution neural network, refAbstract=null), Reference(id=1175114748833185864, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=43, issue=12, pageStart=4401, pageEnd=4410, url=null, language=null, rfNumber=[17], rfOrder=18, authorNames=Karras T, Laine S, Aila T, journalName=IEEE Transactions on Pattern Analysis and Machine Intelligence, refType=null, unstructuredReference=
Karras T,
Laine S,
Aila T. A style-based generator architecture for generative adversarial networks[J].
IEEE Transactions on Pattern Analysis and Machine Intelligence,
2019,
43(12): 4401-4410., articleTitle=A style-based generator architecture for generative adversarial networks, refAbstract=null), Reference(id=1175114748887711817, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2020, volume=20, issue=13, pageStart=5217, pageEnd=5223, url=null, language=null, rfNumber=[18], rfOrder=19, authorNames=刘遵雄, 蒋中慧, 任行乐, journalName=科学技术与工程, refType=null, unstructuredReference=刘遵雄, 蒋中慧, 任行乐. 多尺度生成对抗网络的图像超分辨率算法[J].
科学技术与工程,
2020,
20(13): 5217-5223., articleTitle=多尺度生成对抗网络的图像超分辨率算法, refAbstract=null), Reference(id=1175114748938043466, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2020, volume=20, issue=13, pageStart=5217, pageEnd=5223, url=null, language=null, rfNumber=[18], rfOrder=20, authorNames=Liu Zunxiong, Jiang Zhonghui, Ren Xingle, journalName=Science Technology and Engineering, refType=null, unstructuredReference=
Liu Zunxiong,
Jiang Zhonghui,
Ren Xingle. Image super-resolution algorithm
via multi-scale generative adversarial networks[J].
Science Technology and Engineering,
2020,
20(13): 5217-5223., articleTitle=Image super-resolution algorithm
via multi-scale generative adversarial networks, refAbstract=null), Reference(id=1175114748992569419, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2020, volume=null, issue=null, pageStart=8110, pageEnd=8119, url=null, language=null, rfNumber=[19], rfOrder=21, authorNames=Karras T, Laine S, Aittala M, journalName=Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. Seattle, IEEE, refType=null, unstructuredReference=
Karras T,
Laine S,
Aittala M, et al. Analyzing and improving the image quality of StyleGAN[C]//
Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. Seattle, IEEE,
2020: 8110-8119., articleTitle=Analyzing and improving the image quality of StyleGAN, refAbstract=null), Reference(id=1175114749051289676, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2010, volume=11, issue=8, pageStart=599, pageEnd=605, url=null, language=null, rfNumber=[20], rfOrder=22, authorNames=Kraus N, Chandrasekaran B, journalName=Nature Reviews Neuroscience, refType=null, unstructuredReference=
Kraus N,
Chandrasekaran B. Music training for the development of auditory skills[J].
Nature Reviews Neuroscience,
2010,
11(8): 599-605., articleTitle=Music training for the development of auditory skills, refAbstract=null), Reference(id=1175114749110009933, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2003, volume=13, issue=19, pageStart=1709, pageEnd=1714, url=null, language=null, rfNumber=[21], rfOrder=23, authorNames=Kamachi M, Hill H, Lander K, journalName=Current Biology, refType=null, unstructuredReference=
Kamachi M,
Hill H,
Lander K, et al. Putting the face to the voice: matching identity across modality[J].
Current Biology,
2003,
13(19): 1709-1714., articleTitle=Putting the face to the voice: matching identity across modality, refAbstract=null), Reference(id=1175114749172924494, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=41, issue=2, pageStart=423, pageEnd=443, url=null, language=null, rfNumber=[22], rfOrder=24, authorNames=Baltrušaitis T, Ahuja C, Morency L P, journalName=IEEE Transactions on Pattern Analysis and Machine Intelligence, refType=null, unstructuredReference=
Baltrušaitis T,
Ahuja C,
Morency L P. Multimodal machine learning: a survey and taxonomy[J].
IEEE Transactions on Pattern Analysis and Machine Intelligence,
2019,
41(2): 423-443., articleTitle=Multimodal machine learning: a survey and taxonomy, refAbstract=null), Reference(id=1175114749223256143, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2008, volume=32, issue=3, pageStart=496, pageEnd=512, url=null, language=null, rfNumber=[23], rfOrder=25, authorNames=Kamachi M, Hill H, Johnston A, journalName=Cognitive Science, refType=null, unstructuredReference=
Kamachi M,
Hill H,
Johnston A. This face sounds familiar: auditory face recognition[J].
Cognitive Science,
2008,
32(3): 496-512., articleTitle=This face sounds familiar: auditory face recognition, refAbstract=null), Reference(id=1175114749281976400, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=7, issue=null, pageStart=115141, pageEnd=115150, url=null, language=null, rfNumber=[24], rfOrder=26, authorNames=Kim Y, Morikawa C, Hori T, journalName=IEEE Access, refType=null, unstructuredReference=
Kim Y,
Morikawa C,
Hori T. DeepVoice: a new deep learning approach for voice-based emotion recognition[J].
IEEE Access,
2019,
7: 115141-115150., articleTitle=DeepVoice: a new deep learning approach for voice-based emotion recognition, refAbstract=null), Reference(id=1175114749340696657, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[25], rfOrder=27, authorNames=Smith E, Zhang S, Johnson S, journalName=ArXiv, refType=null, unstructuredReference=
Smith E,
Zhang S,
Johnson S, et al. Cross-modal perceptionist: can face geometry be gleaned from voices[J].
ArXiv,
2019: 1909.04315., articleTitle=Cross-modal perceptionist: can face geometry be gleaned from voices, refAbstract=null), Reference(id=1175114749395222610, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2014, volume=22, issue=10, pageStart=1533, pageEnd=1545, url=null, language=null, rfNumber=[26], rfOrder=28, authorNames=Abdel-Hamid O, Mohamed A R, Jiang H, journalName=IEEE/ACM Transactions on Audio, Speech, and Language Processing, refType=null, unstructuredReference=
Abdel-Hamid O,
Mohamed A R,
Jiang H, et al. Convolutional neural networks for speech recognition[J].
IEEE/ACM Transactions on Audio, Speech, and Language Processing,
2014,
22(10): 1533-1545., articleTitle=Convolutional neural networks for speech recognition, refAbstract=null), Reference(id=1175114749479108691, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=19, issue=1, pageStart=177, pageEnd=182, url=null, language=null, rfNumber=[27], rfOrder=29, authorNames=冯陈定, 李少波, 姚勇, journalName=科学技术与工程, refType=null, unstructuredReference=冯陈定, 李少波, 姚勇, 等. 基于改进卷积神经网络与动态衰减学习率的环境声音识别算法[J].
科学技术与工程,
2019,
19(1): 177-182., articleTitle=基于改进卷积神经网络与动态衰减学习率的环境声音识别算法, refAbstract=null), Reference(id=1175114749537828948, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2019, volume=19, issue=1, pageStart=177, pageEnd=182, url=null, language=null, rfNumber=[27], rfOrder=30, authorNames=Feng Chending, Li Shaobo, Yao Yong, journalName=Science Technology and Engineering, refType=null, unstructuredReference=
Feng Chending,
Li Shaobo,
Yao Yong, et al. Environmental sound recognition with improving convolutional neural networks and learning rate decay[J].
Science Technology and Engineering,
2019,
19(1): 177-182., articleTitle=Environmental sound recognition with improving convolutional neural networks and learning rate decay, refAbstract=null), Reference(id=1175114749588160597, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2015, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[28], rfOrder=31, authorNames=Radford A, Metz L, Chintala S, journalName=ArXiv, refType=null, unstructuredReference=
Radford A,
Metz L,
Chintala S. Unsupervised representation learning with deep convolutional generative adversarial networks[J].
ArXiv,
2015: 1511.06434., articleTitle=Unsupervised representation learning with deep convolutional generative adversarial networks, refAbstract=null), Reference(id=1175114749655269462, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2013, volume=null, issue=null, pageStart=6645, pageEnd=6649, url=null, language=null, rfNumber=[29], rfOrder=32, authorNames=Graves A, Mohamed A R, Hinton G, journalName=2013 IEEE International Conference on Acoustics, Speech and Signal Processing, refType=null, unstructuredReference=
Graves A,
Mohamed A R,
Hinton G. Speech recognition with deep recurrent neural networks[C]//
2013 IEEE International Conference on Acoustics, Speech and Signal Processing. Vancouver: IEEE,
2013: 6645-6649., articleTitle=Speech recognition with deep recurrent neural networks, refAbstract=null), Reference(id=1175114749713989719, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, doi=null, pmid=null, pmcid=null, year=2016, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[30], rfOrder=33, authorNames=Chan W, Jaitly N, Le Q, journalName=ArXiv, refType=null, unstructuredReference=
Chan W,
Jaitly N,
Le Q, et al. Listen, attend and spell[J].
ArXiv,
2016: 1508.01211., articleTitle=Listen, attend and spell, refAbstract=null)], funds=null, companyList=[AuthorCompany(id=1175114743850353657, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, xref=null, ext=[AuthorCompanyExt(id=1175114743858742266, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=School of Information Network Security, People's Public Security University of China, Beijing 100038, China), AuthorCompanyExt(id=1175114743862936571, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, companyId=1175114743850353657, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=中国人民公安大学信息网络安全学院, 北京 100038)])], figs=[ArticleFig(id=1175114745112838162, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.1, caption=
StyleGAN generator structure, figureFileSmall=Ry/Zy8KeE+FoDCK2rE4jbQ==, figureFileBig=0S/JubuFtx4XJY8auN94KA==, tableContent=null), ArticleFig(id=1175114745192529939, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图1, caption=
StyleGAN生成器结构, figureFileSmall=Ry/Zy8KeE+FoDCK2rE4jbQ==, figureFileBig=0S/JubuFtx4XJY8auN94KA==, tableContent=null), ArticleFig(id=1175114745272221716, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.2, caption=
StyleGAN2 generator structure, figureFileSmall=6sEgBNC5dYsM4PPncNcUFg==, figureFileBig=T1XfMzKHbQ5DW5wuKJGneQ==, tableContent=null), ArticleFig(id=1175114745339330581, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图2, caption=
StyleGAN2生成器结构, figureFileSmall=6sEgBNC5dYsM4PPncNcUFg==, figureFileBig=T1XfMzKHbQ5DW5wuKJGneQ==, tableContent=null), ArticleFig(id=1175114745402245142, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.3, caption=
MFCC feature extraction, figureFileSmall=/KRW4+K0LSqfFs0k9HEi7Q==, figureFileBig=bhhRio145lWD3vW2kAmghw==, tableContent=null), ArticleFig(id=1175114745481936919, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图3, caption=
MFCC特征提取, figureFileSmall=/KRW4+K0LSqfFs0k9HEi7Q==, figureFileBig=bhhRio145lWD3vW2kAmghw==, tableContent=null), ArticleFig(id=1175114745532268568, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.4, caption=
Improved ResNet18 residual block, figureFileSmall=433m9Su6g5rRins6MQzMBg==, figureFileBig=J4MuxzADR8GtBp98y2bPKw==, tableContent=null), ArticleFig(id=1175114745599377433, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图4, caption=
改进后的ResNet18残差块, figureFileSmall=433m9Su6g5rRins6MQzMBg==, figureFileBig=J4MuxzADR8GtBp98y2bPKw==, tableContent=null), ArticleFig(id=1175114745662291994, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.5, caption=
Structure of the SE module, figureFileSmall=l/nB70XeU0RArYy/ZGRFTg==, figureFileBig=Hyt8FhE2lB9lmeEpKdc58w==, tableContent=null), ArticleFig(id=1175114745721012251, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图5, caption=
SE模块的结构, figureFileSmall=l/nB70XeU0RArYy/ZGRFTg==, figureFileBig=Hyt8FhE2lB9lmeEpKdc58w==, tableContent=null), ArticleFig(id=1175114745788121116, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.6, caption=
Improved overall framework, figureFileSmall=Be5Q4d0nX/9d82EwfdT4Pg==, figureFileBig=w3UPBvCo7UWNBulLl7+ctQ==, tableContent=null), ArticleFig(id=1175114745846841373, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图6, caption=
改进后的整体框架, figureFileSmall=Be5Q4d0nX/9d82EwfdT4Pg==, figureFileBig=w3UPBvCo7UWNBulLl7+ctQ==, tableContent=null), ArticleFig(id=1175114745897173022, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.7, caption=
Loss of discriminator of generated data, figureFileSmall=7+l1K9B2Xg9nsj36F+VSTw==, figureFileBig=R2bV6/9ZTIA9nuE/uIVGpQ==, tableContent=null), ArticleFig(id=1175114745955893279, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图7, caption=
生成数据的判别器损失, figureFileSmall=7+l1K9B2Xg9nsj36F+VSTw==, figureFileBig=R2bV6/9ZTIA9nuE/uIVGpQ==, tableContent=null), ArticleFig(id=1175114746023002144, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.8, caption=
Discriminator loss of real data, figureFileSmall=RfPDbNK+qWMHkG44OzQ+Mw==, figureFileBig=zajOAmjmEC2RLxq27Isb8g==, tableContent=null), ArticleFig(id=1175114746069139489, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图8, caption=
真实数据的判别器损失, figureFileSmall=RfPDbNK+qWMHkG44OzQ+Mw==, figureFileBig=zajOAmjmEC2RLxq27Isb8g==, tableContent=null), ArticleFig(id=1175114746127859746, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.9, caption=
The generated data is lost through the generator of the discriminator, figureFileSmall=oWVO64ut1JjLyN1PqL0B2w==, figureFileBig=OPS3KED7OcxEvtG97Lczvw==, tableContent=null), ArticleFig(id=1175114746203357219, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图9, caption=
生成数据通过判别器的生成器损失, figureFileSmall=oWVO64ut1JjLyN1PqL0B2w==, figureFileBig=OPS3KED7OcxEvtG97Lczvw==, tableContent=null), ArticleFig(id=1175114746266271780, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.10, caption=
The generated data is lost through the generator of the classifier, figureFileSmall=rk3EXLN7HCOTFXldb8aT6g==, figureFileBig=y5WiI8Ved6BU/F8Yj4zg/Q==, tableContent=null), ArticleFig(id=1175114746329186341, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图10, caption=
生成数据通过分类器的生成器损失, figureFileSmall=rk3EXLN7HCOTFXldb8aT6g==, figureFileBig=y5WiI8Ved6BU/F8Yj4zg/Q==, tableContent=null), ArticleFig(id=1175114746396295206, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.11, caption=
Trends in FID during training, figureFileSmall=haex/fSAIqHlS6vQnKg02Q==, figureFileBig=cQOVma/326j5xnuoB5kKPw==, tableContent=null), ArticleFig(id=1175114746471792679, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图11, caption=
训练过程中的FID变化趋势, figureFileSmall=haex/fSAIqHlS6vQnKg02Q==, figureFileBig=cQOVma/326j5xnuoB5kKPw==, tableContent=null), ArticleFig(id=1175114746522124328, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.12, caption=
Trends in Path Length during training, figureFileSmall=CqbjhGneAwEnpWpOjexytw==, figureFileBig=Fo4sCagWjNpynurRV6ur4Q==, tableContent=null), ArticleFig(id=1175114746631176233, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图12, caption=
训练过程中的Path Length变化趋势, figureFileSmall=CqbjhGneAwEnpWpOjexytw==, figureFileBig=Fo4sCagWjNpynurRV6ur4Q==, tableContent=null), ArticleFig(id=1175114746706673706, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.13, caption=
The results of the generation of different models and real samples, figureFileSmall=2j5GGnSwstOesIRYii0eGg==, figureFileBig=XYP5HrvOcFJ2YkJHF3Qrhg==, tableContent=null), ArticleFig(id=1175114746777976875, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图13, caption=
不同模型的生成结果和真实样本, figureFileSmall=2j5GGnSwstOesIRYii0eGg==, figureFileBig=XYP5HrvOcFJ2YkJHF3Qrhg==, tableContent=null), ArticleFig(id=1175114746832502828, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.14, caption=
Trends in FID during training, figureFileSmall=BxQwt5UFL1B2h4YKKO9WhQ==, figureFileBig=D5RM0SAM7N14svZhxs2iig==, tableContent=null), ArticleFig(id=1175114746912194605, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图14, caption=
训练过程中的FID变化趋势, figureFileSmall=BxQwt5UFL1B2h4YKKO9WhQ==, figureFileBig=D5RM0SAM7N14svZhxs2iig==, tableContent=null), ArticleFig(id=1175114746979303470, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Fig.15, caption=
Trends in Path Length during training, figureFileSmall=6IkM79Zx3/jpUku7nZq2ig==, figureFileBig=9az4FERC+GaCSwjnHe6aRw==, tableContent=null), ArticleFig(id=1175114747042218031, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=图15, caption=
过程中的Path Length变化趋势, figureFileSmall=6IkM79Zx3/jpUku7nZq2ig==, figureFileBig=9az4FERC+GaCSwjnHe6aRw==, tableContent=null), ArticleFig(id=1175114747096743984, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Table 1, caption=
Statistics of the datasets used in experiments
, figureFileSmall=null, figureFileBig=null, tableContent=
| 统计项目 | 训练集 | 验证集 | 测试集 | 合计 |
| 语音段数 | 113 322 | 14 182 | 21 850 | 149 354 |
| 人脸图片 | 106 584 | 12 533 | 20 455 | 139 572 |
| 受试者数量 | 924 | 112 | 189 | 1 225 |
), ArticleFig(id=1175114747230961713, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=表1, caption=
实验中使用的数据集的统计
, figureFileSmall=null, figureFileBig=null, tableContent=
| 统计项目 | 训练集 | 验证集 | 测试集 | 合计 |
| 语音段数 | 113 322 | 14 182 | 21 850 | 149 354 |
| 人脸图片 | 106 584 | 12 533 | 20 455 | 139 572 |
| 受试者数量 | 924 | 112 | 189 | 1 225 |
), ArticleFig(id=1175114747285487666, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Table 2, caption=
Results of evaluation indicators for different models
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | FID | Path Length |
| DCGAN | 179.5 | 343 |
| StyleGAN | 68.2 | 207 |
| StyleGAN2 | 28.9 | 131 |
), ArticleFig(id=1175114747335819315, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=表2, caption=
不同模型的评价指标结果
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | FID | Path Length |
| DCGAN | 179.5 | 343 |
| StyleGAN | 68.2 | 207 |
| StyleGAN2 | 28.9 | 131 |
), ArticleFig(id=1175114747394539572, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=EN, label=Table 3, caption=
Results of ablation experiments
, figureFileSmall=null, figureFileBig=null, tableContent=
| 消融实验 | FID | Path Length |
| BaseModel-NoSE | 34 | 154 |
| BaseModel-ReLU | 42 | 178 |
| BaseModel | 28.9 | 131 |
), ArticleFig(id=1175114747453259829, tenantId=1146029695717560320, journalId=1146123166801305609, articleId=1149773879670960988, language=CN, label=表3, caption=
消融实验的结果
, figureFileSmall=null, figureFileBig=null, tableContent=
| 消融实验 | FID | Path Length |
| BaseModel-NoSE | 34 | 154 |
| BaseModel-ReLU | 42 | 178 |
| BaseModel | 28.9 | 131 |
)], attaches=null, journal=Journal(id=1146119176004939786, delFlag=0, nameCn=科学技术与工程, nameEn=Science Technology and Engineering, nameHistory1=null, nameHistory2=null, issn=1671-1815, eissn=, cn=11-4688/T, coden=null, periodic=4, language=CN, oaType=是, ccby=null, superviseOffice=null, ownerOffice=null, pubOffice=null, editorOffice=null, officeType=null, aims=null, clcCode=null, officeProv=null, officeCity=null, officeAddr=null, officeZip=null, officeEmail=null, officePhone=null, editDirector=null, officeDirector=null, officeDirectorPhone=null, officeStaffNum=null, officeEmpNum=null, coverPicUrl=UKU/O7GSka5polgCTkbIIw==, journalPrice=null, startedYear=null, abbrevIsoEn=Sci Technol Eng, journalRemark=null, publicationField=null, createdTime=null, updatedTime=1754445529766, createdBy=null, updatedBy=13701087609, firstLetterCn=S, firstLetterEn=S, subjectCode=Natural Sciences, subjectName=自然科学, subjectCodeEn=Natural Sciences, subjectNameEn=null, picCn=UKU/O7GSka5polgCTkbIIw==, picEn=5hwlULoNwcbj3xUmVi9MAQ==, jcr=null, cjcr=null, exts=[JournalExt(id=1159791870395564357, language=CN, name=科学技术与工程, nameHistory1=null, nameHistory2=null, managedBy=, sponsoredBy=, publishedBy=, editorOffice=, officeProv=null, officeCity=null, officeAddr=, officeZip=, editDirector=null, officeDirector=null, officePhone=null, coverPicUrl=null, journalRemark=, submitArticleUrl=null, websiteUrl=http://www.stae.com.cn/jsygc/home, createdTime=1754445529793, updatedTime=1754445529793, createdBy=13701087609, updatedBy=13701087609, submissionGuidelinesUrl=http://www.stae.com.cn/jsygc/site/menus/20090429150146001, submissionAuthorUrl=http://www.stae.com.cn/jsygc/author/login, submissionEditorUrl=http://www.stae.com.cn/jsygc/editor/login, submissionReviewUrl=http://www.stae.com.cn/jsygc/reviewer/login, submissionCeEditorUrl=, submissionAeEditorUrl=, option={"copyright":""}), JournalExt(id=1159791870441701702, language=EN, name=Science Technology and Engineering, nameHistory1=null, nameHistory2=null, managedBy=, sponsoredBy=, publishedBy=, editorOffice=, officeProv=null, officeCity=null, officeAddr=, officeZip=, editDirector=null, officeDirector=null, officePhone=null, coverPicUrl=null, journalRemark=, submitArticleUrl=null, websiteUrl=http://www.stae.com.cn/jsygc/home, createdTime=1754445529804, updatedTime=1754445529804, createdBy=13701087609, updatedBy=13701087609, submissionGuidelinesUrl=, submissionAuthorUrl=http://www.stae.com.cn/jsygc/author/login, submissionEditorUrl=http://www.stae.com.cn/jsygc/editor/login, submissionReviewUrl=http://www.stae.com.cn/jsygc/reviewer/login, submissionCeEditorUrl=, submissionAeEditorUrl=, option={"copyright":""})], databaseList=null, tenantJournalId=1146123166801305609, websiteList=[Website(id=1148243202391400884, webName=null, webTitle=null, webDomain=null, webCopyrigh=null, webIpcNo=null, seoTitle=null, seoKeywords=null, seoDescription=null, tenantJournalId=null, journalId=1146123166801305609, journalNameCn=null, journalNameEn=null, grayFlag=null, tenantId=1146029695717560320, platformId=null, journalGroupId=null, journalGroupNameCn=null, journalGroupNameEn=null, type=1, domain=https://castjournals.cast.org.cn/joweb/kxjsygc/CN, language=CN, createTime=1751692112777, createBy=18614031015, updateTime=1753520965431, updateBy=18614031015, name=科学技术与工程-中文站点, tplId=1146099689490845704, title=科学技术与工程, delFlag=0, indexPage=/home, props=[WebsiteProps(id=1148622798802673703, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1148243202391400884, code=articleTextType, value=kx, createTime=1751782615614, updateTime=1751782615614, creator=18614031015, updator=18614031015), WebsiteProps(id=1148622798781702180, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1148243202391400884, code=banner, value=null, createTime=1751782615609, updateTime=1751782615609, creator=18614031015, updator=18614031015), WebsiteProps(id=1148622798769119267, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1148243202391400884, code=logo, value=https://castjournals.cast.org.cn/joweb/kjdb/CN/file/pic?fileId=j86gbwi+p0Idkyl5SzIlmQ==, createTime=1751782615606, updateTime=1751782615606, creator=18614031015, updator=18614031015), WebsiteProps(id=1148622798794285094, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1148243202391400884, code=picServerUrl, value=https://castjournals.cast.org.cn/joweb/kjdb/CN/file/pic, createTime=1751782615612, updateTime=1751782615612, creator=18614031015, updator=18614031015), WebsiteProps(id=1148622798790090789, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1148243202391400884, code=staticResourcePath, value=https://castjournals.cast.org.cn/joweb/cast_kjdb_cn_619/, createTime=1751782615611, updateTime=1751782615611, creator=18614031015, updator=18614031015)]), Website(id=1155914124811976731, webName=null, webTitle=null, webDomain=null, webCopyrigh=null, webIpcNo=null, seoTitle=null, seoKeywords=null, seoDescription=null, tenantJournalId=null, journalId=1146123166801305609, journalNameCn=null, journalNameEn=null, grayFlag=null, tenantId=1146029695717560320, platformId=null, journalGroupId=null, journalGroupNameCn=null, journalGroupNameEn=null, type=1, domain=https://castjournals.cast.org.cn/joweb/kxjsygc/EN, language=EN, createTime=1753521003206, createBy=18614031015, updateTime=1753521003206, updateBy=18614031015, name=科学技术与工程-英文站点, tplId=1146101810881728533, title=Science Technology and Engineering, delFlag=0, indexPage=/home, props=[WebsiteProps(id=1155914371227308235, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1155914124811976731, code=articleTextType, value=kx, createTime=1753521061952, updateTime=1753521061952, creator=18614031015, updator=18614031015), WebsiteProps(id=1155914371210531016, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1155914124811976731, code=banner, value=null, createTime=1753521061947, updateTime=1753521061947, creator=18614031015, updator=18614031015), WebsiteProps(id=1155914371202142407, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1155914124811976731, code=logo, value=https://castjournals.cast.org.cn/joweb/kjdb/CN/file/pic?fileId=j86gbwi+p0Idkyl5SzIlmQ==, createTime=1753521061945, updateTime=1753521061945, creator=18614031015, updator=18614031015), WebsiteProps(id=1155914371223113930, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1155914124811976731, code=picServerUrl, value=https://castjournals.cast.org.cn/joweb/kjdb/CN/file/pic, createTime=1753521061950, updateTime=1753521061950, creator=18614031015, updator=18614031015), WebsiteProps(id=1155914371218919625, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1155914124811976731, code=staticResourcePath, value=https://castjournals.cast.org.cn/joweb/cast_kjdb_cn_619/, createTime=1753521061949, updateTime=1753521061949, creator=18614031015, updator=18614031015)])], journalTitle=科学技术与工程, weixinUrl=null, journalUrl=null, iacademicId=null, status=0, seqNo=null, journalTitleEn=Science Technology and Engineering, journalPhotoCn=UKU/O7GSka5polgCTkbIIw==, journalPhotoEn=5hwlULoNwcbj3xUmVi9MAQ==, journalFirstLetter=S, journalRecommend=null, journalNew=null, journalCollection=null, jcrJf=null, cjcrJf=null, jcrJfStr=null, cjcrJfStr=null, submissionFirstDecision=null, sciSubjectClassification=null, casSubjectClassification=null, citeScore=null, totalCitationFrequency=null, icpCode=null, psCode=null, advertisingLicenseCode=null, copyrightInformation=null, country=null, option=null, provinceCode=null, provinceName=null, collectFlag=false), detailUrlCn=https://castjournals.cast.org.cn/joweb/kxjsygc/CN/10.12404/j.issn.1671-1815.2403342, detailUrlEn=https://castjournals.cast.org.cn/joweb/kxjsygc/EN/10.12404/j.issn.1671-1815.2403342, pdfUrlCn=https://castjournals.cast.org.cn/joweb/kxjsygc/CN/PDF/10.12404/j.issn.1671-1815.2403342, pdfUrlEn=https://castjournals.cast.org.cn/joweb/kxjsygc/EN/PDF/10.12404/j.issn.1671-1815.2403342, aliStartDate=null, aliEndDate=null, collectionFlag=false, citedCount=null, citedUrl=null, reference=null)