Article(id=1251480540524790125, tenantId=1146029695717560320, journalId=1251234078029037663, issueId=1251480531381207309, articleNumber=null, orderNo=null, doi=10.11887/j.issn.1001-2486.25050003, pmid=null, cstr=null, oa=null, hot=null, price=null, onlineType=0, articleFormat=0, articleType=null, articleTypeStr=null, receivedDate=1746460800000, receivedDateStr=2025-05-06, revisedDate=null, revisedDateStr=null, acceptedDate=null, acceptedDateStr=null, onlineDate=1776305812244, onlineDateStr=2026-04-16, pubDate=1766851200000, pubDateStr=2025-12-28, doiRegisterDate=null, doiRegisterDateStr=null, onlineIssueDate=1776305812244, onlineIssueDateStr=2026-04-16, onlineJustAcceptDate=null, onlineJustAcceptDateStr=null, onlineFirstDate=null, onlineFirstDateStr=null, sourceXml=null, magXml=null, createTime=1776305812244, creator=13701087609, updateTime=1776305812244, updator=13701087609, issue=Issue{id=1251480531381207309, tenantId=1146029695717560320, journalId=1251234078029037663, year='2025', volume='47', issue='6', pageStart='1', pageEnd='306', issueExtLink='null', onlineDate='null', pubDate='null', beforeIssueId=null, nextIssueId=null, price=null, status=1, issueComplete=1, articleOrder=1, issueType=1, specialIssue=null, createTime=1776305810065, creator=13701087609, updateTime=1776305899308, updator=13701087609, preIssue=null, nextIssue=null, ext={EN=IssueExt(id=1251480905865446141, tenantId=1146029695717560320, journalId=1251234078029037663, issueId=1251480531381207309, language=EN, specialIssueTitle=, coverIllustrator=null, specialIssueEditor=, specialIssueAbout=), CN=IssueExt(id=1251480905865446142, tenantId=1146029695717560320, journalId=1251234078029037663, issueId=1251480531381207309, language=CN, specialIssueTitle=, coverIllustrator=null, specialIssueEditor=, specialIssueAbout=)}, issueFiles=null}, startPage=71, endPage=80, ext={EN=ArticleExt(id=1251480540877111673, articleId=1251480540524790125, tenantId=1146029695717560320, journalId=1251234078029037663, language=EN, title=Memory optimization method for control flow computation graph, columnId=1251480536670220899, journalTitle=Journal of National Niversity of Defense Technology, columnName=Computer System and technology, runingTitle=null, highlight=null, articleAbstract=
AI chips face on-chip memory limits in deep learning.Current optimization methods focus on static computation graphs, leaving room to improve memory efficiency for dynamic graphs.To overcome this limitation, a memory optimization framework for control-flow computation graphs was developed.The framework realized operator-level memory reuse within subgraphs and further achieved recursive reuse across subgraphs by exploiting control-flow characteristics.In addition, a ping-pong buffering strategy for weight data was introduced to mitigate the memory wall between on-chip and off-chip memory, thereby allowing overlapping of memory access and computation operations within subgraphs.Validation on the domestic LUNA AI chip has demonstrated that the proposed framework improves on-chip memory utilization by 5.9% compared with existing methods.Moreover, the strategy effectively alleviates the memory wall problem by reducing data transfer time between on-chip and off-chip memory, resulting in execution efficiency improvements of up to 29%.
, correspAuthors=Kun JING, authorNote=null, correspAuthorsNote=null, copyrightStatement=null, copyrightOwner=null, extLink=null, articleAbsUrl=null, sourceXml=null, magXml=null, pdfUrl=null, pdf=null, pdfFileSize=null, pdfExtLink=null, richHtmlUrl=null, mobilePdfUrl=null, reviewReport=null, pdfFirstPage=null, abstractGraph=null, abstractGraphContent=null, abstractVideo=null, citation=null, cebUrl=null, magXmlContent=null, mapNumber=null, authorCompany=null, fund=null, authors=null, authorsList=Xiangqian WANG, Yuhao SHEN, Kun JING, Yafei LYU), CN=ArticleExt(id=1251480545717338698, articleId=1251480540524790125, tenantId=1146029695717560320, journalId=1251234078029037663, language=CN, title=针对控制流计算图的内存优化方法, columnId=1251480538381496943, journalTitle=国防科技大学学报, columnName=计算机系统与技术, runingTitle=null, highlight=null, articleAbstract=
AI芯片在深度学习应用中受限于片上内存容量,当前主流内存优化方法针对静态计算图,对动态计算图的内存优化存在进一步的优化空间。针对该问题,提出一种控制流计算图模型的内存优化框架,在子图内部实现内存复用的基础上,结合控制流特性递归进行子图间的内存复用。针对片上与片外内存的内存墙问题,针对控制流计算图的权重数据提出一种有效的乒乓缓存实现策略,在子图内部实现访存和计算操作的重叠执行。基于国产LUNA AI芯片进行验证,结果表明,该内存优化框架实现了控制流计算图的片上内存优化使用,相比原有方法进一步提升5.9%。该策略有效解决了内存墙问题,减少了片上片外内存的数据传输时间,计算图的执行效率最高提升29%。
, correspAuthors=景琨, authorNote=null, correspAuthorsNote=
, copyrightStatement=null, copyrightOwner=null, extLink=null, articleAbsUrl=null, sourceXml=9s2SMqzGpRVCDhVgo6Q//w==, magXml=WTOpotmeMSQ2bFsR7tDLXw==, pdfUrl=null, pdf=IsCOERBK0vHwqB8qSeXyzw==, pdfFileSize=2756116, pdfExtLink=null, richHtmlUrl=null, mobilePdfUrl=null, reviewReport=null, pdfFirstPage=null, abstractGraph=txVKJKI0n7BSqsNbg1KYUg==, abstractGraphContent=null, abstractVideo=null, citation=null, cebUrl=null, magXmlContent=26/XzD/g3OMuRV4HEPdLDQ==, mapNumber=null, authorCompany=null, fund=null, authors=
, authorsList=王向前, 申彧昊, 景琨, 吕亚飞)}, authors=[Author(id=1251480547541860985, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, orderNo=0, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=wangxiangqian@ahu.edu.cn, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1251480547638329987, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480547541860985, language=EN, stringName=Xiangqian WANG, firstName=Xiangqian, middleName=null, lastName=WANG, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1.School of Internet, Anhui University, Heifei 230039, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1251480547738993292, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480547541860985, language=CN, stringName=王向前, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1.安徽大学 互联网学院,安徽 合肥 230039, bio={"content":"
王向前(1985—),男,河南南阳人,副教授,博士,硕士生导师,E-mail:wangxiangqian@ahu.edu.cn
"}, bioImg=null, bioContent=
王向前(1985—),男,河南南阳人,副教授,博士,硕士生导师,E-mail:wangxiangqian@ahu.edu.cn
, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1251480547311174246, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, xref=1., ext=[AuthorCompanyExt(id=1251480547315368550, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.School of Internet, Anhui University, Heifei 230039, China), AuthorCompanyExt(id=1251480547323757159, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.安徽大学 互联网学院,安徽 合肥 230039)])]), Author(id=1251480547873211032, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, orderNo=1, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=null, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1251480547999040164, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480547873211032, language=EN, stringName=Yuhao SHEN, firstName=Yuhao, middleName=null, lastName=SHEN, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1.School of Internet, Anhui University, Heifei 230039, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1251480548179395242, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480547873211032, language=CN, stringName=申彧昊, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1.安徽大学 互联网学院,安徽 合肥 230039, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1251480547311174246, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, xref=1., ext=[AuthorCompanyExt(id=1251480547315368550, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.School of Internet, Anhui University, Heifei 230039, China), AuthorCompanyExt(id=1251480547323757159, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.安徽大学 互联网学院,安徽 合肥 230039)])]), Author(id=1251480548271669938, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, orderNo=2, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=jingkun@ahu.edu.cn, emailSecond=null, emailThird=null, correspondingAuthor=1, authorType=1, ext={EN=AuthorExt(id=1251480548380721851, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480548271669938, language=EN, stringName=Kun JING, firstName=Kun, middleName=null, lastName=JING, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, *, address=
1.School of Internet, Anhui University, Heifei 230039, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1251480548477190855, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480548271669938, language=CN, stringName=景琨, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, *, address=
1.安徽大学 互联网学院,安徽 合肥 230039, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1251480547311174246, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, xref=1., ext=[AuthorCompanyExt(id=1251480547315368550, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.School of Internet, Anhui University, Heifei 230039, China), AuthorCompanyExt(id=1251480547323757159, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.安徽大学 互联网学院,安徽 合肥 230039)])]), Author(id=1251480548590437073, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, orderNo=3, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=null, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1251480548695294682, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480548590437073, language=EN, stringName=Yafei LYU, firstName=Yafei, middleName=null, lastName=LYU, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
2, address=
2.iFLYTEK Co., Ltd., Heifei 230026, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1251480548808540899, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, authorId=1251480548590437073, language=CN, stringName=吕亚飞, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
2, address=
2.科大讯飞股份有限公司,安徽 合肥 230026, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1251480547424420463, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, xref=2., ext=[AuthorCompanyExt(id=1251480547432809072, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547424420463, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2.iFLYTEK Co., Ltd., Heifei 230026, China), AuthorCompanyExt(id=1251480547441197681, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547424420463, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2.科大讯飞股份有限公司,安徽 合肥 230026)])])], keywords=[Keyword(id=1251480548946952939, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, orderNo=1, keyword=AI chip), Keyword(id=1251480549056004852, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, orderNo=2, keyword=memory optimization), Keyword(id=1251480549173445375, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, orderNo=3, keyword=memory reuse), Keyword(id=1251480549269914375, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, orderNo=4, keyword=cross-memory transfer), Keyword(id=1251480549366383375, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, orderNo=1, keyword=AI芯片), Keyword(id=1251480549462852374, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, orderNo=2, keyword=内存优化), Keyword(id=1251480549550932767, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, orderNo=3, keyword=内存重用), Keyword(id=1251480549639013157, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, orderNo=4, keyword=跨内存传输)], refs=[Reference(id=1251480553975922714, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2024, volume=null, issue=null, pageStart=121, pageEnd=180, url=null, language=null, rfNumber=[1], rfOrder=0, authorNames=LI B, LI B, journalName=Embedded artificial intelligence, refType=null, unstructuredReference=
LI B.
Embedded AI accelerator chips[M]//
LI B.
Embedded artificial intelligence.Singapore:Springer Nature Singapore Pte Ltd.,
2024:121-180., articleTitle=null, refAbstract=null), Reference(id=1251480554064003105, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2020, volume=6, issue=4, pageStart=56, pageEnd=68, url=null, language=null, rfNumber=[2], rfOrder=1, authorNames=马玮良, 彭轩, 熊倩, journalName=大数据, refType=null, unstructuredReference=马玮良, 彭轩, 熊倩,
等.深度学习中的内存管理问题研究综述[J].
大数据,
2020,
6(4):56-68., articleTitle=深度学习中的内存管理问题研究综述, refAbstract=null), Reference(id=1251480554143694892, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2020, volume=6, issue=4, pageStart=56, pageEnd=68, url=null, language=null, rfNumber=[2], rfOrder=2, authorNames=MA W L, PENG X, XIONG Q, journalName=Big Data Research, refType=null, unstructuredReference=
MA W L,
PENG X,
XIONG Q,
et al.Memory management in deep learning:a survey[J].
Big Data Research,
2020,
6(4):56-68.(in Chinese), articleTitle=Memory management in deep learning:a survey, refAbstract=null), Reference(id=1251480554244358201, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2024, volume=null, issue=null, pageStart=286, pageEnd=301, url=null, language=null, rfNumber=[3], rfOrder=3, authorNames=XIA C W, ZHAO J C, SUN Q Q, journalName=null, refType=null, unstructuredReference=
XIA C W,
ZHAO J C,
SUN Q Q,
et al.Optimizing deep learning inference via global analysis and tensor expressions[C]//Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems,
2024:286-301., articleTitle=Optimizing deep learning inference via global analysis and tensor expressions, refAbstract=null), Reference(id=1251480554357604422, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2016, volume=null, issue=null, pageStart=2818, pageEnd=2826, url=null, language=null, rfNumber=[4], rfOrder=4, authorNames=SZEGEDY C, VANHOUCKE V, IOFFE S, journalName=null, refType=null, unstructuredReference=
SZEGEDY C,
VANHOUCKE V,
IOFFE S,
et al.Rethinking the inception architecture for computer vision[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR),
2016:2818-2826., articleTitle=Rethinking the inception architecture for computer vision, refAbstract=null), Reference(id=1251480554449879116, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2018, volume=null, issue=null, pageStart=4510, pageEnd=4520, url=null, language=null, rfNumber=[5], rfOrder=5, authorNames=SANDLER M, HOWARD A, ZHU M L, journalName=null, refType=null, unstructuredReference=
SANDLER M,
HOWARD A,
ZHU M L,
et al. MobileNetV2:inverted residuals and linear bottlenecks[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition,
2018:4510-4520., articleTitle=MobileNetV2:inverted residuals and linear bottlenecks, refAbstract=null), Reference(id=1251480556077269084, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2023, volume=34, issue=1, pageStart=304, pageEnd=315, url=null, language=null, rfNumber=[6], rfOrder=6, authorNames=FANG J R, ZHU Z L, LI S G, journalName=IEEE Transactions on Parallel and Distributed Systems, refType=null, unstructuredReference=
FANG J R,
ZHU Z L,
LI S G,
et al.Parallel training of pretrained models via chunk-based dynamic memory management[J].
IEEE Transactions on Parallel and Distributed Systems,
2023,
34(1):304-315., articleTitle=Parallel training of pretrained models via chunk-based dynamic memory management, refAbstract=null), Reference(id=1251480556190515297, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2016, volume=29, issue=null, pageStart=4125, pageEnd=4133, url=null, language=null, rfNumber=[7], rfOrder=7, authorNames=GRUSLYS A, MUNOS R, DANIHELKA I, journalName=Advances in Neural Information Processing Systems, refType=null, unstructuredReference=
GRUSLYS A,
MUNOS R,
DANIHELKA I,
et al.Memory-efficient backpropagation through time[J].
Advances in Neural Information Processing Systems,
2016,
29:4125-4133., articleTitle=Memory-efficient backpropagation through time, refAbstract=null), Reference(id=1251480556291178602, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[8], rfOrder=8, authorNames=CHEN T Q, LI M, LI Y T, journalName=null, refType=null, unstructuredReference=
CHEN T Q,
LI M,
LI Y T,
et al.MXNet:a flexible and efficient machine learning library for heterogeneous distributed systems[EB/OL].(2015-12-03)[
2025-04-20].
https://arxiv.org/abs/1512.01274?context=cs.MS., articleTitle=MXNet:a flexible and efficient machine learning library for heterogeneous distributed systems, refAbstract=null), Reference(id=1251480556391841904, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[9], rfOrder=9, authorNames=CHEN T Q, XU B, ZHANG C Y, journalName=null, refType=null, unstructuredReference=
CHEN T Q,
XU B,
ZHANG C Y,
et al.Training deep nets with sublinear memory cost[EB/OL].(2016-04-22)[
2025-04-20].
https://arxiv.org/abs/1604.06174., articleTitle=Training deep nets with sublinear memory cost, refAbstract=null), Reference(id=1251480556484116603, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[10], rfOrder=10, authorNames=PISARCHYK Y, LEE J, journalName=null, refType=null, unstructuredReference=
PISARCHYK Y,
LEE J.Efficient memory management for deep neural net inference[EB/OL].(2020-02-16)[
2025-04-20].
https://arxiv.org/abs/2001.03288., articleTitle=Efficient memory management for deep neural net inference, refAbstract=null), Reference(id=1251480556593168513, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2023, volume=59, issue=10, pageStart=75, pageEnd=85, url=null, language=null, rfNumber=[11], rfOrder=11, authorNames=王鑫, 李嘉楠, 韩林, journalName=计算机工程与应用, refType=null, unstructuredReference=王鑫, 李嘉楠, 韩林,
等.面向国产异构平台的OpenMP Offload共享内存访存优化[J].
计算机工程与应用,
2023,
59(10):75-85., articleTitle=面向国产异构平台的OpenMP Offload共享内存访存优化, refAbstract=null), Reference(id=1251480556689637513, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2023, volume=59, issue=10, pageStart=75, pageEnd=85, url=null, language=null, rfNumber=[11], rfOrder=12, authorNames=WANG X, LI J N, HAN L, journalName=Computer Engineering and Applications, refType=null, unstructuredReference=
WANG X,
LI J N,
HAN L,
et al.Optimization of OpenMP Offload shared memory access for domestic heterogeneous platforms[J].
Computer Engineering and Applications,
2023,
59(10):75-85.(in Chinese), articleTitle=Optimization of OpenMP Offload shared memory access for domestic heterogeneous platforms, refAbstract=null), Reference(id=1251480556798689424, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[12], rfOrder=13, authorNames=LEE J Y, CHIRKOV N, IGNASHEVA E, journalName=null, refType=null, unstructuredReference=
LEE J Y,
CHIRKOV N,
IGNASHEVA E,
et al.On-device neural net inference with mobile GPUs[EB/OL].(2019-07-03)[
2025-04-20].
https://arxiv.org/abs/1907.01989., articleTitle=On-device neural net inference with mobile GPUs, refAbstract=null), Reference(id=1251480556903547031, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2022, volume=22, issue=10, pageStart=11, pageEnd=15, url=null, language=null, rfNumber=[13], rfOrder=14, authorNames=许鹏, 宋岩, journalName=单片机与嵌入式系统应用, refType=null, unstructuredReference=许鹏, 宋岩.TFLite-micro内存管理与分配策略的优化[J].
单片机与嵌入式系统应用,
2022,
22(10):11-15., articleTitle=TFLite-micro内存管理与分配策略的优化, refAbstract=null), Reference(id=1251480557004210336, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2022, volume=22, issue=10, pageStart=11, pageEnd=15, url=null, language=null, rfNumber=[13], rfOrder=15, authorNames=XU P, SONG Y, journalName=Microcontrollers &Embedded Systems, refType=null, unstructuredReference=
XU P,
SONG Y. Optimizations of TFLite-micro memory management and allocation policy[J].
Microcontrollers &Embedded Systems,
2022,
22(10):11-15.(in Chinese), articleTitle=Optimizations of TFLite-micro memory management and allocation policy, refAbstract=null), Reference(id=1251480557113262245, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[14], rfOrder=16, authorNames=SEKIYAMA T, IMAMICHI T, IMAI H, journalName=null, refType=null, unstructuredReference=
SEKIYAMA T,
IMAMICHI T,
IMAI H,
et al.Profile-guided memory optimization for deep neural networks[EB/OL]. (2018-04-26)[
2025-04-20].
https://arxiv.org/abs/1804.10001., articleTitle=Profile-guided memory optimization for deep neural networks, refAbstract=null), Reference(id=1251480557226508462, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2024, volume=46, issue=9, pageStart=1539, pageEnd=1546, url=null, language=null, rfNumber=[15], rfOrder=17, authorNames=曹博钧, 钱入意, 徐远超, journalName=计算机工程与科学, refType=null, unstructuredReference=曹博钧, 钱入意, 徐远超.一种面向计算图的及时内存重用算法[J].
计算机工程与科学,
2024,
46(9):1539-1546., articleTitle=一种面向计算图的及时内存重用算法, refAbstract=null), Reference(id=1251480557339754680, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2024, volume=46, issue=9, pageStart=1539, pageEnd=1546, url=null, language=null, rfNumber=[15], rfOrder=18, authorNames=CAO B J, QIAN R Y, XU Y C, journalName=Computer Engineering &Science, refType=null, unstructuredReference=
CAO B J,
QIAN R Y,
XU Y C.An urgent memory reuse algorithm for computational graphs[J].
Computer Engineering &Science,
2024,
46(9):1539-1546.(in Chinese), articleTitle=An urgent memory reuse algorithm for computational graphs, refAbstract=null), Reference(id=1251480557423640767, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=1997, volume=9, issue=8, pageStart=1735, pageEnd=1780, url=null, language=null, rfNumber=[16], rfOrder=19, authorNames=HOCHREITER S, SCHMIDHUBER J, journalName=Neural Computation, refType=null, unstructuredReference=
HOCHREITER S,
SCHMIDHUBER J. Long short-term memory[J].
Neural Computation,
1997,
9 (8):1735-1780., articleTitle=Long short-term memory, refAbstract=null), Reference(id=1251480557566247117, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2014, volume=null, issue=null, pageStart=3104, pageEnd=3112, url=null, language=null, rfNumber=[17], rfOrder=20, authorNames=SUTSKEVER I, VINYALS O, LE Q V, journalName=null, refType=null, unstructuredReference=
SUTSKEVER I,
VINYALS O,
LE Q V. Sequence to sequence learning with neural networks[C]//Proceedings of the 28th International Conference on Neural Information Processing Systems,
2014:3104-3112., articleTitle=Sequence to sequence learning with neural networks, refAbstract=null), Reference(id=1251480557683687637, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[18], rfOrder=21, authorNames=TAI K S, SOCHER R, MANNING C D, journalName=null, refType=null, unstructuredReference=
TAI K S,
SOCHER R,
MANNING C D.Improved semantic representations from tree-structured long short-term memory networks[EB/OL].(2015-05-30)[
2025-04-20].
https://arxiv.org/pdf/1503.00075., articleTitle=Improved semantic representations from tree-structured long short-term memory networks, refAbstract=null), Reference(id=1251480557801128159, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2018, volume=null, issue=null, pageStart=420, pageEnd=436, url=null, language=null, rfNumber=[19], rfOrder=22, authorNames=WANG X, YU F, DOU Z Y, journalName=null, refType=null, unstructuredReference=
WANG X,
YU F,
DOU Z Y,
et al.SkipNet:learning dynamic routing in convolutional networks[C]//Proceedings of Computer Vision—ECCV 2018,
2018:420-436., articleTitle=SkipNet:learning dynamic routing in convolutional networks, refAbstract=null), Reference(id=1251480557901791465, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2024, volume=null, issue=null, pageStart=386, pageEnd=400, url=null, language=null, rfNumber=[20], rfOrder=23, authorNames=NIU W, AGRAWAL G, REN B, journalName=null, refType=null, unstructuredReference=
NIU W,
AGRAWAL G,
REN B.SoD
2:statically optimizing dynamic deep neural network execution[C]//Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems,
2024:386-400., articleTitle=SoD
2:statically optimizing dynamic deep neural network execution, refAbstract=null), Reference(id=1251480558027620594, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2023, volume=null, issue=null, pageStart=681, pageEnd=699, url=null, language=null, rfNumber=[21], rfOrder=24, authorNames=ZHANG C, MA L X, XUE J L, journalName=null, refType=null, unstructuredReference=
ZHANG C,
MA L X,
XUE J L,
et al.Cocktailer:analyzing and optimizing dynamic control flow in deep learning[C]//Proceedings of the 17th USENIX Symposium on Operating Systems Design and Implementation,
2023:681-699., articleTitle=Cocktailer:analyzing and optimizing dynamic control flow in deep learning, refAbstract=null), Reference(id=1251480558166032637, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2024, volume=null, issue=null, pageStart=1, pageEnd=6, url=null, language=null, rfNumber=[22], rfOrder=25, authorNames=MAJM, LI X H, WANGZ H, journalName=null, refType=null, unstructuredReference=
MAJM,
LI X H,
WANGZ H,
et al. A holistic functionalization approach to optimizing imperative tensor programs in deep learning[C]//Proceedings of the 61 st ACM/IEEE Design Automation Conference,
2024:1-6., articleTitle=A holistic functionalization approach to optimizing imperative tensor programs in deep learning, refAbstract=null), Reference(id=1251480558249918726, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2023, volume=null, issue=null, pageStart=305, pageEnd=324, url=null, language=null, rfNumber=[23], rfOrder=26, authorNames=LAMPROU I, ZHANG Z, DE JUAN J, journalName=null, refType=null, unstructuredReference=
LAMPROU I,
ZHANG Z,
DE JUAN J,
et al.Safe optimized static memory allocation for parallel deep learning[C]//Proceedings of the 6th Machine Learning and Systems,
2023:305-324., articleTitle=Safe optimized static memory allocation for parallel deep learning, refAbstract=null), Reference(id=1251480558342193423, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2022, volume=null, issue=null, pageStart=450, pageEnd=463, url=null, language=null, rfNumber=[24], rfOrder=27, authorNames=WANG Q P, XU M W, JIN C, journalName=null, refType=null, unstructuredReference=
WANG Q P,
XU M W,
JIN C,
et al.Melon:breaking the memory wall for resource-efficient on-device machine learning[C]//Proceedings of the 20th Annual International Conference on Mobile Systems,Applications and Services,
2022:450-463., articleTitle=Melon:breaking the memory wall for resource-efficient on-device machine learning, refAbstract=null), Reference(id=1251480558447051029, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2020, volume=34, issue=4, pageStart=3577, pageEnd=3584, url=null, language=null, rfNumber=[25], rfOrder=28, authorNames=CHENG A C, LIN C H, JUAN D C, journalName=Proceedings of the AAAI Conference on Artificial Intelligence, refType=null, unstructuredReference=
CHENG A C,
LIN C H,
JUAN D C,
et al.InstaNAS:instance-aware neural architecture search[J].
Proceedings of the AAAI Conference on Artificial Intelligence,
2020,
34(4):3577-3584., articleTitle=InstaNAS:instance-aware neural architecture search, refAbstract=null), Reference(id=1251480558547714331, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2020, volume=null, issue=null, pageStart=8550, pageEnd=8559, url=null, language=null, rfNumber=[26], rfOrder=29, authorNames=LI Y W, SONG L, CHEN Y K, journalName=null, refType=null, unstructuredReference=
LI Y W,
SONG L,
CHEN Y K,
et al.Learning dynamic routing for semantic segmentation[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
2020:8550-8559., articleTitle=Learning dynamic routing for semantic segmentation, refAbstract=null), Reference(id=1251480558660960548, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025, volume=36, issue=3, pageStart=4246, pageEnd=4266, url=null, language=null, rfNumber=[27], rfOrder=30, authorNames=GUO J F, PHILIP CHEN C L, LIU Z L, journalName=IEEE Transactions on Neural Networks and Learning Systems, refType=null, unstructuredReference=
GUO J F,
PHILIP CHEN C L,
LIU Z L,
et al.Dynamic neural network structure: a review for its theories and applications[J].
IEEE Transactions on Neural Networks and Learning Systems,
2025,
36(3):4246-4266., articleTitle=Dynamic neural network structure: a review for its theories and applications, refAbstract=null), Reference(id=1251480558786789676, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[28], rfOrder=31, authorNames=LE T D, IMAI H, NEGISHI Y, journalName=null, refType=null, unstructuredReference=
LE T D,
IMAI H,
NEGISHI Y,
et al.TFLMS:large model sup-port in tensorflow by graph rewriting[EB/OL].(2019-10-02)[
2025-04-20].
https://arxiv.org/abs/1807.02037., articleTitle=TFLMS:large model sup-port in tensorflow by graph rewriting, refAbstract=null), Reference(id=1251480558904230198, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2017, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[29], rfOrder=32, authorNames=MENG C, SUN M, YANG J, journalName=null, refType=null, unstructuredReference=
MENG C,
SUN M,
YANG J,
et al.Training deeper models by GPU memory optimization on TensorFlow[C]//Proceedings of the 31 st Conference on Neural Information Processing Systems,
2017., articleTitle=Training deeper models by GPU memory optimization on TensorFlow, refAbstract=null), Reference(id=1251480560468705600, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2025-04-20, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[30], rfOrder=33, authorNames=YANG H M, ZHOU J, FU Y, journalName=null, refType=null, unstructuredReference=
YANG H M,
ZHOU J,
FU Y,
et al.ProTrain:efficient LLM training via memory-aware techniques[EB/OL].(2024-06-12)[
2025-04-20].
https://arxiv.org/abs/2406.08334., articleTitle=ProTrain:efficient LLM training via memory-aware techniques, refAbstract=null), Reference(id=1251480560560980294, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2019, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[31], rfOrder=34, authorNames=宋鹤鸣, journalName=智能语音系统加速器设计, refType=null, unstructuredReference=宋鹤鸣.
智能语音系统加速器设计[D].上海:上海交通大学,
2019., articleTitle=null, refAbstract=null), Reference(id=1251480560682615118, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, doi=null, pmid=null, pmcid=null, year=2019, volume=null, issue=null, pageStart=null, pageEnd=null, url=null, language=null, rfNumber=[31], rfOrder=35, authorNames=SONG H M, journalName=Design of accelerator for voice intelligent system, refType=null, unstructuredReference=
SONG H M.
Design of accelerator for voice intelligent system[D].Shanghai:Shanghai Jiao Tong University,
2019. (in Chinese), articleTitle=null, refAbstract=null)], funds=null, companyList=[AuthorCompany(id=1251480547311174246, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, xref=1., ext=[AuthorCompanyExt(id=1251480547315368550, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.School of Internet, Anhui University, Heifei 230039, China), AuthorCompanyExt(id=1251480547323757159, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547311174246, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1.安徽大学 互联网学院,安徽 合肥 230039)]), AuthorCompany(id=1251480547424420463, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, xref=2., ext=[AuthorCompanyExt(id=1251480547432809072, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547424420463, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2.iFLYTEK Co., Ltd., Heifei 230026, China), AuthorCompanyExt(id=1251480547441197681, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, companyId=1251480547424420463, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2.科大讯飞股份有限公司,安徽 合肥 230026)])], figs=[ArticleFig(id=1251480549790008109, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Fig.1, caption=
Reuse results of different strategies in the computation graph, figureFileSmall=XNh9Zbcyw9j7kR4phfRuZg==, figureFileBig=txVKJKI0n7BSqsNbg1KYUg==, tableContent=null), ArticleFig(id=1251480549899060019, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=图1, caption=
计算图不同策略复用结果, figureFileSmall=XNh9Zbcyw9j7kR4phfRuZg==, figureFileBig=txVKJKI0n7BSqsNbg1KYUg==, tableContent=null), ArticleFig(id=1251480551589364554, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Alg.1, caption=
Memory reuse, figureFileSmall=DG/6FddycePKzBkuI4CWVw==, figureFileBig=nVLceM8JMvlB15HyGyPtgA==, tableContent=null), ArticleFig(id=1251480551681639250, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=算法1, caption=
内存复用, figureFileSmall=DG/6FddycePKzBkuI4CWVw==, figureFileBig=nVLceM8JMvlB15HyGyPtgA==, tableContent=null), ArticleFig(id=1251480551778108251, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Fig.2, caption=
Comparison of node reuse optimization results in branch control flow, figureFileSmall=0WDQt4gfROxnLIy0cM0Xzw==, figureFileBig=vr3LOYRDnZDQxw5MFNyFpg==, tableContent=null), ArticleFig(id=1251480551874577252, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=图2, caption=
分支控制流节点复用优化结果对比, figureFileSmall=0WDQt4gfROxnLIy0cM0Xzw==, figureFileBig=vr3LOYRDnZDQxw5MFNyFpg==, tableContent=null), ArticleFig(id=1251480551975240552, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Alg.2, caption=
Control flow optimization memory reuse algorithm, figureFileSmall=WeDax/iTHwl8si/qaiTcrw==, figureFileBig=FE1kMlXIkiqd2KHnjJZHBA==, tableContent=null), ArticleFig(id=1251480552063320944, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=算法2, caption=
控制流优化内存复用算法, figureFileSmall=WeDax/iTHwl8si/qaiTcrw==, figureFileBig=FE1kMlXIkiqd2KHnjJZHBA==, tableContent=null), ArticleFig(id=1251480552176567160, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Fig.3, caption=
DMA asynchronous insertion, figureFileSmall=CXBd1Q2vYkoKdPmPUwwV8w==, figureFileBig=u2oXs5rSitKfX0Q43ze4Zw==, tableContent=null), ArticleFig(id=1251480552314979204, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=图3, caption=
DMA异步插入, figureFileSmall=CXBd1Q2vYkoKdPmPUwwV8w==, figureFileBig=u2oXs5rSitKfX0Q43ze4Zw==, tableContent=null), ArticleFig(id=1251480552398865289, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Alg.3, caption=
DMA asynchronous insertion, figureFileSmall=xPuCEEG9+A+z5ckdwjsuPw==, figureFileBig=rvOBgAOCuuk8b1Z9rkPr0w==, tableContent=null), ArticleFig(id=1251480552512111507, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=算法3, caption=
DMA异步插入, figureFileSmall=xPuCEEG9+A+z5ckdwjsuPw==, figureFileBig=rvOBgAOCuuk8b1Z9rkPr0w==, tableContent=null), ArticleFig(id=1251480552633746332, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Fig.4, caption=
DMA insertion in control flow graph, figureFileSmall=hPzsjrjxHgauCuHvbAPxLA==, figureFileBig=hCiNqZHOabKKpWcljk3DIQ==, tableContent=null), ArticleFig(id=1251480552751186855, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=图4, caption=
控制流计算图内执行DMA插入, figureFileSmall=hPzsjrjxHgauCuHvbAPxLA==, figureFileBig=hCiNqZHOabKKpWcljk3DIQ==, tableContent=null), ArticleFig(id=1251480552877015985, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Fig.5, caption=
Optimization effect comparison, figureFileSmall=KPVO2+4S7NBKXtfDhUFAaA==, figureFileBig=cOq5KAcp91iW5sXTMfnZmQ==, tableContent=null), ArticleFig(id=1251480552965096380, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=图5, caption=
优化效果对比, figureFileSmall=KPVO2+4S7NBKXtfDhUFAaA==, figureFileBig=cOq5KAcp91iW5sXTMfnZmQ==, tableContent=null), ArticleFig(id=1251480553061565380, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Fig.6, caption=
FusionDetect memory allocation, figureFileSmall=GrVhPtGzAgu9b6YZS38nkg==, figureFileBig=8/SoDHLha9q4mXgLargoAA==, tableContent=null), ArticleFig(id=1251480553179005899, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=图6, caption=
FusionDetect内存分配, figureFileSmall=GrVhPtGzAgu9b6YZS38nkg==, figureFileBig=8/SoDHLha9q4mXgLargoAA==, tableContent=null), ArticleFig(id=1251480553304835030, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Tab.1, caption=
Effect of different models using the algorithm before optimization
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | 原始内存 | 大张量优先复用算法 | 短张量优先复用算法 | 顺序复用算法 |
|---|
| FaceDetect | 934000 | 409600 | 409600 | 440320 |
| FaceAlgin | 202429 | 46080 | 46668 | 46668 |
| 唤醒 | 312664 | 24152 | 24152 | 24152 |
| FusionDetect | 26502400 | 489600 | 489600 | 604800 |
| 降噪 | 794128 | 99840 | 117760 | 142848 |
), ArticleFig(id=1251480553422275552, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=表1, caption=
不同模型优化前的效果
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | 原始内存 | 大张量优先复用算法 | 短张量优先复用算法 | 顺序复用算法 |
|---|
| FaceDetect | 934000 | 409600 | 409600 | 440320 |
| FaceAlgin | 202429 | 46080 | 46668 | 46668 |
| 唤醒 | 312664 | 24152 | 24152 | 24152 |
| FusionDetect | 26502400 | 489600 | 489600 | 604800 |
| 降噪 | 794128 | 99840 | 117760 | 142848 |
), ArticleFig(id=1251480553560687599, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Tab.2, caption=
Optimization effect of different models using control flow optimization algorithm
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | 大张量优先复用算法 | 短张量优先复用算法 | 顺序复用算法 |
|---|
| 唤醒 | 24152 | 24152 | 24152 |
| FusionDetect | 460800 | 460800 | 576000 |
| 降噪 | 98340 | 116224 | 141312 |
), ArticleFig(id=1251480553631990775, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=表2, caption=
不同模型使用控制流优化算法的优化效果
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | 大张量优先复用算法 | 短张量优先复用算法 | 顺序复用算法 |
|---|
| 唤醒 | 24152 | 24152 | 24152 |
| FusionDetect | 460800 | 460800 | 576000 |
| 降噪 | 98340 | 116224 | 141312 |
), ArticleFig(id=1251480553732654078, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=EN, label=Tab.3, caption=
Model asynchronous insertion DMA optimization results
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | 原始时间/ms | 异步优化后的时间/ms | 优化效率/% |
|---|
| FaceAlign | 2411.414 | 1712.770 | 28.97 |
| FaceDetect | 4978.807 | 4342.279 | 12.78 |
| BodyDetect | 1712.77 | 1702.147 | 0.62 |
| FusionDetect | 6006.455 | 5129.129 | 14.61 |
| 唤醒 | 2229.174 | 1695.734 | 23.93 |
), ArticleFig(id=1251480553858482190, tenantId=1146029695717560320, journalId=1251234078029037663, articleId=1251480540524790125, language=CN, label=表3, caption=
模型异步插入DMA优化结果
, figureFileSmall=null, figureFileBig=null, tableContent=
| 模型 | 原始时间/ms | 异步优化后的时间/ms | 优化效率/% |
|---|
| FaceAlign | 2411.414 | 1712.770 | 28.97 |
| FaceDetect | 4978.807 | 4342.279 | 12.78 |
| BodyDetect | 1712.77 | 1702.147 | 0.62 |
| FusionDetect | 6006.455 | 5129.129 | 14.61 |
| 唤醒 | 2229.174 | 1695.734 | 23.93 |
)], attaches=null, journal=Journal(id=1251231494090305632, delFlag=0, nameCn=国防科技大学学报, nameEn=Journal of National Niversity of Defense Technology, nameHistory1=null, nameHistory2=null, issn=1001-2486, eissn=, cn=43-1067/T, coden=null, periodic=双月刊, language=CN, oaType=1, ccby=null, superviseOffice=null, ownerOffice=null, pubOffice=null, editorOffice=null, officeType=null, aims=null, clcCode=null, officeProv=null, officeCity=null, officeAddr=null, officeZip=null, officeEmail=, officePhone=, editDirector=null, officeDirector=null, officeDirectorPhone=null, officeStaffNum=null, officeEmpNum=null, coverPicUrl=h+HgOUssQ5XqPoD980XNIA==, journalPrice=null, startedYear=null, abbrevIsoEn=Journal of National Niversity of Defense Technology, journalRemark=null, publicationField=null, createdTime=1776246434950, updatedTime=1776251967711, createdBy=18614031015, updatedBy=13701087609, firstLetterCn=J, firstLetterEn=J, subjectCode=Engineering, subjectName=工程, subjectCodeEn=Engineering, subjectNameEn=null, picCn=h+HgOUssQ5XqPoD980XNIA==, picEn=hJx8onaXftcX9VtGkHdjDA==, jcr=null, cjcr=null, exts=[JournalExt(id=1251254700306285546, language=CN, name=国防科技大学学报, nameHistory1=null, nameHistory2=null, managedBy=, sponsoredBy=, publishedBy=, editorOffice=, officeProv=null, officeCity=null, officeAddr=, officeZip=, editDirector=, officeDirector=null, officePhone=null, coverPicUrl=null, journalRemark=, submitArticleUrl=null, websiteUrl=, createdTime=1776251967741, updatedTime=1776251967741, createdBy=13701087609, updatedBy=13701087609, submissionGuidelinesUrl=, submissionAuthorUrl=http://journal.nudt.edu.cn/gfkjdxxb/author/login, submissionEditorUrl=http://journal.nudt.edu.cn/gfkjdxxb/editor/login, submissionReviewUrl=http://journal.nudt.edu.cn/gfkjdxxb/reviewer/login, submissionCeEditorUrl=, submissionAeEditorUrl=, option={"copyright":""}), JournalExt(id=1251254700356617195, language=EN, name=Journal of National Niversity of Defense Technology, nameHistory1=null, nameHistory2=null, managedBy=, sponsoredBy=, publishedBy=, editorOffice=, officeProv=null, officeCity=null, officeAddr=, officeZip=, editDirector=, officeDirector=null, officePhone=null, coverPicUrl=null, journalRemark=, submitArticleUrl=null, websiteUrl=, createdTime=1776251967753, updatedTime=1776251967753, createdBy=13701087609, updatedBy=13701087609, submissionGuidelinesUrl=, submissionAuthorUrl=http://journal.nudt.edu.cn/gfkjdxxb/author/login, submissionEditorUrl=http://journal.nudt.edu.cn/gfkjdxxb/editor/login, submissionReviewUrl=http://journal.nudt.edu.cn/gfkjdxxb/reviewer/login, submissionCeEditorUrl=, submissionAeEditorUrl=, option={"copyright":""})], databaseList=null, tenantJournalId=1251234078029037663, websiteList=[Website(id=1251257283485843500, webName=null, webTitle=null, webDomain=null, webCopyrigh=null, webIpcNo=null, seoTitle=null, seoKeywords=null, seoDescription=null, tenantJournalId=null, journalId=1251234078029037663, journalNameCn=null, journalNameEn=null, grayFlag=null, tenantId=1146029695717560320, platformId=null, journalGroupId=null, journalGroupNameCn=null, journalGroupNameEn=null, type=1, domain=https://castjournals.cast.org.cn/joweb/gfkjdxxb/CN, language=CN, createTime=1776252583619, createBy=18614031015, updateTime=1776253414371, updateBy=18614031015, name=国防科技大学学报-中文, tplId=1146099689490845704, title=国防科技大学学报, delFlag=0, indexPage=/home, props=[WebsiteProps(id=1251260875290653228, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=articleTextType, value=kx, createTime=1776253439972, updateTime=1776253439972, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875273876009, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=banner, value=null, createTime=1776253439968, updateTime=1776253439968, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875311624751, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=grayFlag, value=0, createTime=1776253439977, updateTime=1776253439977, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875261293096, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=logo, value=https://castjournals.cast.org.cn/joweb/gfkjdxxb/CN/file/pic?fileId=WpHzMFTSHy8AuOKzUbYrdw==, createTime=1776253439965, updateTime=1776253439965, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875382927921, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=minRunFlag, value=0, createTime=1776253439994, updateTime=1776253439994, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875286458923, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=picServerUrl, value=https://castjournals.cast.org.cn/joweb/gfkjdxxb/CN/file/pic, createTime=1776253439971, updateTime=1776253439971, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875320013360, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=silenceFlag, value=0, createTime=1776253439979, updateTime=1776253439979, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875278070314, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=staticResourcePath, value=https://castjournals.cast.org.cn/joweb/cast_kjdb_cn_619/, createTime=1776253439969, updateTime=1776253439969, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875299041837, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=themeColor, value=null, createTime=1776253439974, updateTime=1776253439974, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260875303236142, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283485843500, code=themeStyle, value=null, createTime=1776253439975, updateTime=1776253439975, creator=18614031015, updator=18614031015)]), Website(id=1251257283599089718, webName=null, webTitle=null, webDomain=null, webCopyrigh=null, webIpcNo=null, seoTitle=null, seoKeywords=null, seoDescription=null, tenantJournalId=null, journalId=1251234078029037663, journalNameCn=null, journalNameEn=null, grayFlag=null, tenantId=1146029695717560320, platformId=null, journalGroupId=null, journalGroupNameCn=null, journalGroupNameEn=null, type=1, domain=https://castjournals.cast.org.cn/joweb/gfkjdxxb/EN, language=EN, createTime=1776252583646, createBy=18614031015, updateTime=1776253409915, updateBy=18614031015, name=国防科技大学学报-英文, tplId=1146101810881728533, title=Journal of National Niversity of Defense Technology, delFlag=0, indexPage=/home, props=[WebsiteProps(id=1251260846312210678, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=articleTextType, value=kx, createTime=1776253433063, updateTime=1776253433063, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846232518899, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=banner, value=null, createTime=1776253433044, updateTime=1776253433044, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846396096761, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=grayFlag, value=0, createTime=1776253433083, updateTime=1776253433083, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846219935986, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=logo, value=https://castjournals.cast.org.cn/joweb/gfkjdxxb/EN/file/pic?fileId=WpHzMFTSHy8AuOKzUbYrdw==, createTime=1776253433041, updateTime=1776253433041, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846442234107, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=minRunFlag, value=0, createTime=1776253433094, updateTime=1776253433094, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846282850549, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=picServerUrl, value=https://castjournals.cast.org.cn/joweb/gfkjdxxb/EN/file/pic, createTime=1776253433056, updateTime=1776253433056, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846417068282, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=silenceFlag, value=0, createTime=1776253433088, updateTime=1776253433088, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846257684724, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=staticResourcePath, value=https://castjournals.cast.org.cn/joweb/cast_kjdb_en_623/, createTime=1776253433050, updateTime=1776253433050, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846337376503, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=themeColor, value=null, createTime=1776253433070, updateTime=1776253433070, creator=18614031015, updator=18614031015), WebsiteProps(id=1251260846362542328, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1251257283599089718, code=themeStyle, value=null, createTime=1776253433075, updateTime=1776253433075, creator=18614031015, updator=18614031015)])], journalTitle=国防科技大学学报, weixinUrl=null, journalUrl=http://journal.nudt.edu.cn/, iacademicId=null, status=1, seqNo=null, journalTitleEn=Journal of National Niversity of Defense Technology, journalPhotoCn=h+HgOUssQ5XqPoD980XNIA==, journalPhotoEn=hJx8onaXftcX9VtGkHdjDA==, journalFirstLetter=J, journalRecommend=null, journalNew=null, journalCollection=null, jcrJf=null, cjcrJf=null, jcrJfStr=null, cjcrJfStr=null, submissionFirstDecision=null, sciSubjectClassification=null, casSubjectClassification=null, citeScore=null, totalCitationFrequency=null, icpCode=null, psCode=null, advertisingLicenseCode=null, copyrightInformation=null, country=null, option=, provinceCode=null, provinceName=null, collectFlag=false), detailUrlCn=https://castjournals.cast.org.cn/joweb/gfkjdxxb/CN/10.11887/j.issn.1001-2486.25050003, detailUrlEn=https://castjournals.cast.org.cn/joweb/gfkjdxxb/EN/10.11887/j.issn.1001-2486.25050003, pdfUrlCn=https://castjournals.cast.org.cn/joweb/gfkjdxxb/CN/PDF/10.11887/j.issn.1001-2486.25050003, pdfUrlEn=https://castjournals.cast.org.cn/joweb/gfkjdxxb/EN/PDF/10.11887/j.issn.1001-2486.25050003, aliStartDate=null, aliEndDate=null, collectionFlag=false, citedCount=null, citedUrl=null, reference=null)