Article(id=1263514354360660966, tenantId=1146029695717560320, journalId=1263187241531621409, issueId=1263514351571428296, articleNumber=null, orderNo=null, doi=10.11996/JG.j.2095-302X.2026010039, pmid=null, cstr=null, oa=null, hot=null, price=null, onlineType=0, articleFormat=0, articleType=null, articleTypeStr=research-article, receivedDate=1749484800000, receivedDateStr=2025-06-10, revisedDate=null, revisedDateStr=null, acceptedDate=1760112000000, acceptedDateStr=2025-10-11, onlineDate=1779174897041, onlineDateStr=2026-05-19, pubDate=1772208000000, pubDateStr=2026-02-28, doiRegisterDate=null, doiRegisterDateStr=null, onlineIssueDate=1779174897041, onlineIssueDateStr=2026-05-19, onlineJustAcceptDate=null, onlineJustAcceptDateStr=null, onlineFirstDate=null, onlineFirstDateStr=null, sourceXml=null, magXml=null, createTime=1779174897041, creator=13701087609, updateTime=1779174897041, updator=13701087609, issue=Issue{id=1263514351571428296, tenantId=1146029695717560320, journalId=1263187241531621409, year='2026', volume='47', issue='1', pageStart='1', pageEnd='233', issueExtLink='null', onlineDate='null', pubDate='null', beforeIssueId=null, nextIssueId=null, price=null, status=1, issueComplete=1, articleOrder=1, issueType=1, specialIssue=null, createTime=1779174896376, creator=13701087609, updateTime=1779174963943, updator=13701087609, preIssue=null, nextIssue=null, ext={EN=IssueExt(id=1263514635077039012, tenantId=1146029695717560320, journalId=1263187241531621409, issueId=1263514351571428296, language=EN, specialIssueTitle=, coverIllustrator=null, specialIssueEditor=, specialIssueAbout=), CN=IssueExt(id=1263514635077039013, tenantId=1146029695717560320, journalId=1263187241531621409, issueId=1263514351571428296, language=CN, specialIssueTitle=, coverIllustrator=null, specialIssueEditor=, specialIssueAbout=)}, issueFiles=null}, startPage=39, endPage=47, ext={EN=ArticleExt(id=1263514354754925545, articleId=1263514354360660966, tenantId=1146029695717560320, journalId=1263187241531621409, language=EN, title=A mixed-precision quantization method for large language models via memory alignment, columnId=1263514354654262248, journalTitle=Journal of Graphics, columnName=Image Processing and Computer Vision, runingTitle=null, highlight=null, articleAbstract=
As large models continue to grow in scale, the memory footprint and computational overhead of model inference have become critical challenges. Mixed-precision quantization is an effective approach to reduce resource consumption, but existing methods suffer from insufficient outlier handling, significant quantization accuracy loss, and inefficient memory access. To address these issues, a memory-aligned mixed-precision quantization method for large models was proposed. First, weights were divided into SIMD-aligned groups, and outlier groups were identified via group-wise significance analysis, with high-significance groups quantized to 8 bit and others to 2 bit. A block-wise compensation strategy was introduced to mitigate accuracy degradation caused by 2 bit quantization. Furthermore, an efficient packing and storage scheme was designed for mixed-precision weights, where a bitmap was used to record the bit width of each data block, enabling random access. Experimental results demonstrated that the proposed method significantly reduced memory usage and improved computational efficiency while maintaining model accuracy. Specifically, on Llama2-7 B/13 B/70 B, the approach achieved perplexity reductions of 8.13/2.84/1.37 on WikiText-2 and 5.80 on C4 relative to state-of-the-art baselines. The quantized 70 B model reduced weight storage by approximately 87% compared with BF16. Across seven QA benchmarks, an average accuracy gain of 6.24% was achieved. Last, these results indicated that a mixed-precision quantization method for large language models via memory alignment could simultaneously improve compression ratio, memory-access efficiency, and overall model performance.
, correspAuthors=null, authorNote=null, correspAuthorsNote=
, copyrightStatement=null, copyrightOwner=null, extLink=null, articleAbsUrl=null, sourceXml=null, magXml=null, pdfUrl=null, pdf=null, pdfFileSize=null, pdfExtLink=null, richHtmlUrl=null, mobilePdfUrl=null, reviewReport=null, pdfFirstPage=null, abstractGraph=null, abstractGraphContent=null, abstractVideo=null, citation=null, cebUrl=null, magXmlContent=null, mapNumber=null, authorCompany=null, fund=null, authors=null, authorsList=Zhangming LI, Weifan GUAN, Zhengwei CHANG, Linghao ZHANG, Qinghao HU), CN=ArticleExt(id=1263514355929330674, articleId=1263514354360660966, tenantId=1146029695717560320, journalId=1263187241531621409, language=CN, title=一种基于内存对齐的大模型混合精度量化方法, columnId=1263514354893337578, journalTitle=图学学报, columnName=图像处理与计算机视觉, runingTitle=null, highlight=null, articleAbstract=
随着大模型规模的不断增长,模型推理的内存占用和计算开销成为重要挑战。模型量化是降低模型资源消耗的有效方法,但现有方法在权重量化过程中存在离群点处理不足、量化精度损失显著以及内存访问效率低下等问题。为此,提出一种内存对齐的大模型混合精度量化方法,通过将模型参数表示成不同位宽的量化参数实现混合精度量化方法,在降低模型存储的同时缓解量化带来的精度损失问题。具体来说,基于小组显著性分析划分权重离群点,将模型参数按单指令多数据流(SIMD)单元对齐分组,并依据显著性对不同小组采用8 bit或2 bit量化;针对2 bit量化可能导致的精度损失,引入分块量化补偿策略。此外,设计了一种高效的混合精度权重打包与存储方案,通过位图(Bitmap)记录数据块位宽类型,支持随机访问。实验结果表明,该方法在保证模型精度的同时,显著降低了内存占用并提升了计算效率。通过在Llama2-7 B,13 B和70 B上进行验证,相比最先进的方法,在WikiText2和C4数据集上的困惑度(PPL)分别下降8.13,2.84,1.37及5.80,并且量化后的70 B模型相对BF16权重存储约减87%。此外在7个QA数据集上平均准确率提升6.24%。其结果表明,基于内存对齐的大模型混合精度量化方法能够同时提升压缩率、访存效率与模型性能。
, correspAuthors=null, authorNote=null, correspAuthorsNote=
, copyrightStatement=null, copyrightOwner=null, extLink=null, articleAbsUrl=null, sourceXml=auhxYIO6AD2ruxD5VfmO2g==, magXml=XFHy99NY9dmK8ZmXjoR5bg==, pdfUrl=null, pdf=h+NWAOuC3DFCMa1ynETK1A==, pdfFileSize=1782865, pdfExtLink=null, richHtmlUrl=null, mobilePdfUrl=null, reviewReport=null, pdfFirstPage=null, abstractGraph=E+iGjTOXd9gl+YhRRduRLA==, abstractGraphContent=null, abstractVideo=null, citation=null, cebUrl=null, magXmlContent=Nx9gonTy5B1ez6UQ7CyDCg==, mapNumber=null, authorCompany=null, fund=null, authors=null, authorsList=李章明, 关伟凡, 常政威, 张凌浩, 胡庆浩)}, authors=[Author(id=1263550811322368646, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, orderNo=0, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=null, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1263550811737604745, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550811322368646, language=EN, stringName=Zhangming LI, firstName=Zhangming, middleName=null, lastName=LI, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1 The Key Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1263550812144452240, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550811322368646, language=CN, stringName=李章明, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1 中国科学院自动化研究所复杂系统认知与决策重点实验室, 北京 100190, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1263550809204245104, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, xref=1, ext=[AuthorCompanyExt(id=1263550809216828017, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 The Key Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China), AuthorCompanyExt(id=1263550809275548275, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 中国科学院自动化研究所复杂系统认知与决策重点实验室, 北京 100190)])]), Author(id=1263550812656157335, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, orderNo=1, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=null, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1263550813461463708, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550812656157335, language=EN, stringName=Weifan GUAN, firstName=Weifan, middleName=null, lastName=GUAN, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1 The Key Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1263550813935420066, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550812656157335, language=CN, stringName=关伟凡, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1 中国科学院自动化研究所复杂系统认知与决策重点实验室, 北京 100190, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1263550809204245104, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, xref=1, ext=[AuthorCompanyExt(id=1263550809216828017, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 The Key Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China), AuthorCompanyExt(id=1263550809275548275, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 中国科学院自动化研究所复杂系统认知与决策重点实验室, 北京 100190)])]), Author(id=1263550815474729639, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, orderNo=2, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=null, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1263550816309396142, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550815474729639, language=EN, stringName=Zhengwei CHANG, firstName=Zhengwei, middleName=null, lastName=CHANG, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
2, address=
2 State Grid Sichuan Electric Power Company, Chengdu Sichuan 610041, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1263550816762380982, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550815474729639, language=CN, stringName=常政威, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
2, address=
2 国网四川省电力公司, 四川 成都 610041, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1263550810915521146, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, xref=2, ext=[AuthorCompanyExt(id=1263550810961658492, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550810915521146, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2 State Grid Sichuan Electric Power Company, Chengdu Sichuan 610041, China), AuthorCompanyExt(id=1263550810978435710, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550810915521146, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2 国网四川省电力公司, 四川 成都 610041)])]), Author(id=1263550817567687357, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, orderNo=3, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=null, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1263550817995506372, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550817567687357, language=EN, stringName=Linghao ZHANG, firstName=Linghao, middleName=null, lastName=ZHANG, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
2, address=
2 State Grid Sichuan Electric Power Company, Chengdu Sichuan 610041, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1263550820197515979, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550817567687357, language=CN, stringName=张凌浩, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
2, address=
2 国网四川省电力公司, 四川 成都 610041, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1263550810915521146, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, xref=2, ext=[AuthorCompanyExt(id=1263550810961658492, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550810915521146, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2 State Grid Sichuan Electric Power Company, Chengdu Sichuan 610041, China), AuthorCompanyExt(id=1263550810978435710, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550810915521146, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2 国网四川省电力公司, 四川 成都 610041)])]), Author(id=1263550820931519189, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, orderNo=4, firstName=null, middleName=null, lastName=null, nameCn=null, orcid=null, stid=null, country=null, authorPic=null, dead=0, email=huqinghao2014@ia.ac.cn, emailSecond=null, emailThird=null, correspondingAuthor=0, authorType=1, ext={EN=AuthorExt(id=1263550821405475553, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550820931519189, language=EN, stringName=Qinghao HU, firstName=Qinghao, middleName=null, lastName=HU, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1 The Key Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null), CN=AuthorExt(id=1263550822013649642, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, authorId=1263550820931519189, language=CN, stringName=胡庆浩, firstName=null, middleName=null, lastName=null, prefix=null, suffix=null, authorComment=null, nameInitials=null, affiliation=null, department=null, xref=
1, address=
1 中国科学院自动化研究所复杂系统认知与决策重点实验室, 北京 100190, bio=null, bioImg=null, bioContent=null, aboutCorrespAuthor=null)}, companyList=[AuthorCompany(id=1263550809204245104, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, xref=1, ext=[AuthorCompanyExt(id=1263550809216828017, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 The Key Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China), AuthorCompanyExt(id=1263550809275548275, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 中国科学院自动化研究所复杂系统认知与决策重点实验室, 北京 100190)])])], keywords=[Keyword(id=1263550824471511807, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, orderNo=1, keyword=large language model compression), Keyword(id=1263550825226486538, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, orderNo=2, keyword=post-training quantization), Keyword(id=1263550825620751126, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, orderNo=3, keyword=low-bit quantization), Keyword(id=1263550826086318880, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, orderNo=4, keyword=mixed-precision quantization), Keyword(id=1263550827738874673, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, orderNo=5, keyword=outlier extraction), Keyword(id=1263550829802472253, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, orderNo=1, keyword=大模型压缩), Keyword(id=1263550830570029895, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, orderNo=2, keyword=训练后量化), Keyword(id=1263550831606022993, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, orderNo=3, keyword=低比特量化), Keyword(id=1263550833241801561, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, orderNo=4, keyword=混合精度量化), Keyword(id=1263550833984193377, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, orderNo=5, keyword=离群点划分)], refs=[Reference(id=1263550861284918263, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2023, volume=null, issue=null, pageStart=3, pageEnd=null, url=null, language=null, rfNumber=[1], rfOrder=0, authorNames=GUO C, TANG J M, HU W M, journalName=The 50th Annual International Symposium on Computer Architecture, refType=null, unstructuredReference=
GUO C,
TANG J M,
HU W M,
et al. OliVe: accelerating large language models via hardware-friendly outlier-victim pair quantization[C]//
The 50th Annual International Symposium on Computer Architecture. New York: ACM,
2023: 3., articleTitle=OliVe: accelerating large language models via hardware-friendly outlier-victim pair quantization, refAbstract=null), Reference(id=1263550861695960064, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2310.00034, language=null, rfNumber=[2], rfOrder=1, authorNames=SHANG Y Z, YUAN Z H, WU Q, journalName=null, refType=null, unstructuredReference=
SHANG Y Z,
YUAN Z H,
WU Q,
et al. PB-LLM: partially binarized large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2310.00034., articleTitle=PB-LLM: partially binarized large language models, refAbstract=null), Reference(id=1263550862463516679, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2402.04291, language=null, rfNumber=[3], rfOrder=2, authorNames=HUANG W, LIU Y D, QIN H T, journalName=null, refType=null, unstructuredReference=
HUANG W,
LIU Y D,
QIN H T,
et al. BiLLM: pushing the limit of post-training quantization for LLMs[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2402.04291., articleTitle=BiLLM: pushing the limit of post-training quantization for LLMs, refAbstract=null), Reference(id=1263550862673231885, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2301.00774, language=null, rfNumber=[4], rfOrder=3, authorNames=FRANTAR E, ALISTARH D, journalName=null, refType=null, unstructuredReference=
FRANTAR E,
ALISTARH D. SparseGPT: massive language models can be accurately pruned in one-shot[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2301.00774., articleTitle=SparseGPT: massive language models can be accurately pruned in one-shot, refAbstract=null), Reference(id=1263550864854269972, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2306.11695, language=null, rfNumber=[5], rfOrder=4, authorNames=SUN M J, LIU Z, BAIR A, journalName=null, refType=null, unstructuredReference=
SUN M J,
LIU Z,
BAIR A,
et al. A simple and effective pruning approach for large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2306.11695., articleTitle=A simple and effective pruning approach for large language models, refAbstract=null), Reference(id=1263550865416306716, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2306.08543, language=null, rfNumber=[6], rfOrder=5, authorNames=GU Y X, DONG L, WEI F R, journalName=null, refType=null, unstructuredReference=
GU Y X,
DONG L,
WEI F R,
et al. MiniLLM: knowledge distillation of large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2306.08543., articleTitle=MiniLLM: knowledge distillation of large language models, refAbstract=null), Reference(id=1263550866108366884, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2016, volume=null, issue=null, pageStart=26, pageEnd=35, url=null, language=null, rfNumber=[7], rfOrder=6, authorNames=QIU J T, WANG J, YAO S, journalName=2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays, refType=null, unstructuredReference=
QIU J T,
WANG J,
YAO S,
et al. Going deeper with embedded FPGA platform for convolutional neural network[C]//
2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays. New York: ACM,
2016: 26-35., articleTitle=Going deeper with embedded FPGA platform for convolutional neural network, refAbstract=null), Reference(id=1263550866750095404, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/1511.06393, language=null, rfNumber=[8], rfOrder=7, authorNames=LIN D D, TALATHI S S, ANNAPUREDDY V S, journalName=null, refType=null, unstructuredReference=
LIN D D,
TALATHI S S,
ANNAPUREDDY V S. Fixed point quantization of deep convolutional networks[EB/OL]. [2025-04-10]. https://arxiv.org/abs/1511.06393., articleTitle=Fixed point quantization of deep convolutional networks, refAbstract=null), Reference(id=1263550867232440374, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/1511.00363, language=null, rfNumber=[9], rfOrder=8, authorNames=COURBARIAUX M, BENGIO Y, DAVID J P, journalName=null, refType=null, unstructuredReference=
COURBARIAUX M,
BENGIO Y,
DAVID J P. BinaryConnect: training deep neural networks with binary weights during propagations[EB/OL]. [2025-04-10]. https://arxiv.org/abs/1511.00363., articleTitle=BinaryConnect: training deep neural networks with binary weights during propagations, refAbstract=null), Reference(id=1263550869459615808, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2024, volume=null, issue=null, pageStart=467, pageEnd=484, url=null, language=null, rfNumber=[10], rfOrder=9, authorNames=LIU Z C, OGUZ B, ZHAO C S, journalName=Findings of the Association for Computational Linguistics, refType=null, unstructuredReference=
LIU Z C,
OGUZ B,
ZHAO C S,
et al. LLM-QAT: data-free quantization aware training for large language models[C]//
Findings of the Association for Computational Linguistics. New York: ACL,
2024: 467-484., articleTitle=LLM-QAT: data-free quantization aware training for large language models, refAbstract=null), Reference(id=1263550870004875332, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2210.17323, language=null, rfNumber=[11], rfOrder=10, authorNames=FRANTAR E, ASHKBOOS S, HOEFLER T, journalName=null, refType=null, unstructuredReference=
FRANTAR E,
ASHKBOOS S,
HOEFLER T,
et al. GPTQ: accurate post-training quantization for generative pre-trained transformers[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2210.17323., articleTitle=GPTQ: accurate post-training quantization for generative pre-trained transformers, refAbstract=null), Reference(id=1263550870348808268, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2025, volume=null, issue=null, pageStart=7708, pageEnd=7743, url=null, language=null, rfNumber=[12], rfOrder=11, authorNames=LEE J H, KIM J, YANG J Y, journalName=2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics:Human Language Technologies, refType=null, unstructuredReference=
LEE J H,
KIM J,
YANG J Y,
et al. LRQ: optimizing post-training quantization for large language models by learning low-rank weight-scaling matrices[C]//
2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics:Human Language Technologies. New York: ACL,
2025: 7708-7743., articleTitle=LRQ: optimizing post-training quantization for large language models by learning low-rank weight-scaling matrices, refAbstract=null), Reference(id=1263550870604660820, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2505.07004, language=null, rfNumber=[13], rfOrder=12, authorNames=KIM J, EL HALABI M, PARK W, journalName=null, refType=null, unstructuredReference=
KIM J,
EL HALABI M,
PARK W,
et al. GuidedQuant: large language model quantization via exploiting end loss guidance[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2505.07004., articleTitle=GuidedQuant: large language model quantization via exploiting end loss guidance, refAbstract=null), Reference(id=1263550870810181720, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2208.07339, language=null, rfNumber=[14], rfOrder=13, authorNames=DETTMERS T, LEWIS M, BELKADA Y, journalName=null, refType=null, unstructuredReference=
DETTMERS T,
LEWIS M,
BELKADA Y,
et al. LLM.int8(): 8-bit matrix multiplication for transformers at scale[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2208.07339., articleTitle=LLM.int8(): 8-bit matrix multiplication for transformers at scale, refAbstract=null), Reference(id=1263550870990536798, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2206.01861, language=null, rfNumber=[15], rfOrder=14, authorNames=YAO Z W, AMINABADI R Y, ZHANG M J, journalName=null, refType=null, unstructuredReference=
YAO Z W,
AMINABADI R Y,
ZHANG M J,
et al. ZeroQuant: efficient and affordable post-training quantization for large-scale transformers[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2206.01861., articleTitle=ZeroQuant: efficient and affordable post-training quantization for large-scale transformers, refAbstract=null), Reference(id=1263550871196057700, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2211.10438, language=null, rfNumber=[16], rfOrder=15, authorNames=XIAO G X, LIN J, SEZNEC M, journalName=null, refType=null, unstructuredReference=
XIAO G X,
LIN J,
SEZNEC M,
et al. SmoothQuant: accurate and efficient post-training quantization for large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2211.10438., articleTitle=SmoothQuant: accurate and efficient post-training quantization for large language models, refAbstract=null), Reference(id=1263550871481270380, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2308.13137, language=null, rfNumber=[17], rfOrder=16, authorNames=SHAO W Q, CHEN M Z, ZHANG Z Y, journalName=null, refType=null, unstructuredReference=
SHAO W Q,
CHEN M Z,
ZHANG Z Y,
et al. OmniQuant: omnidirectionally calibrated quantization for large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2308.13137., articleTitle=OmniQuant: omnidirectionally calibrated quantization for large language models, refAbstract=null), Reference(id=1263550871657431153, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2303.08302, language=null, rfNumber=[18], rfOrder=17, authorNames=YAO Z W, WU X X, LI C, journalName=null, refType=null, unstructuredReference=
YAO Z W,
WU X X,
LI C,
et al. ZeroQuant-V2:exploring post-training quantization in LLMs from comprehensive study to low rank compensation[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2303.08302., articleTitle=ZeroQuant-V2:exploring post-training quantization in LLMs from comprehensive study to low rank compensation, refAbstract=null), Reference(id=1263550871879729271, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2306.00978, language=null, rfNumber=[19], rfOrder=18, authorNames=LIN J, TANG J M, TANG H T, journalName=null, refType=null, unstructuredReference=
LIN J,
TANG J M,
TANG H T,
et al. AWQ: activation-aware weight quantization for LLM compression and acceleration[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2306.00978., articleTitle=AWQ: activation-aware weight quantization for LLM compression and acceleration, refAbstract=null), Reference(id=1263550873666502780, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2306.03078, language=null, rfNumber=[20], rfOrder=19, authorNames=DETTMERS T, SVIRSCHEVSKI R, EGIAZARIAN V, journalName=null, refType=null, unstructuredReference=
DETTMERS T,
SVIRSCHEVSKI R,
EGIAZARIAN V,
et al. SpQR: a sparse-quantized representation for near-lossless LLM weight compression[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2306.03078., articleTitle=SpQR: a sparse-quantized representation for near-lossless LLM weight compression, refAbstract=null), Reference(id=1263550873867829382, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2307.13304, language=null, rfNumber=[21], rfOrder=20, authorNames=CHEE J, CAI Y H, KULESHOV V, journalName=null, refType=null, unstructuredReference=
CHEE J,
CAI Y H,
KULESHOV V,
et al. QuIP:2-bit quantization of large language models with guarantees[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2307.13304., articleTitle=QuIP:2-bit quantization of large language models with guarantees, refAbstract=null), Reference(id=1263550874073350282, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2024, volume=null, issue=null, pageStart=18536, pageEnd=18544, url=null, language=null, rfNumber=[22], rfOrder=21, authorNames=LI L, LI Q Y, ZHANG B, journalName=The 38th AAAI Conference on Artificial Intelligence, refType=null, unstructuredReference=
LI L,
LI Q Y,
ZHANG B,
et al. Norm tweaking: High-performance low-bit quantization of large language models[C]//
The 38th AAAI Conference on Artificial Intelligence. Philadelphia: AAAI,
2024: 18536-18544., articleTitle=Norm tweaking: High-performance low-bit quantization of large language models, refAbstract=null), Reference(id=1263550874320814221, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2309.01885, language=null, rfNumber=[23], rfOrder=22, authorNames=BEHDIN K, ACHARYA A, GUPTA A, journalName=null, refType=null, unstructuredReference=
BEHDIN K,
ACHARYA A,
GUPTA A,
et al. QuantEase: optimization-based quantization for language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2309.01885., articleTitle=QuantEase: optimization-based quantization for language models, refAbstract=null), Reference(id=1263550874509557908, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2304.01089, language=null, rfNumber=[24], rfOrder=23, authorNames=YUAN Z H, NIU L, LIU J W, journalName=null, refType=null, unstructuredReference=
YUAN Z H,
NIU L,
LIU J W,
et al. RPTQ: reorder-based post-training quantization for large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2304.01089., articleTitle=RPTQ: reorder-based post-training quantization for large language models, refAbstract=null), Reference(id=1263550874622804117, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/1609.07843, language=null, rfNumber=[25], rfOrder=24, authorNames=MERITY S, XIONG C M, BRADBURY J, journalName=null, refType=null, unstructuredReference=
MERITY S,
XIONG C M,
BRADBURY J,
et al. Pointer sentinel mixture models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/1609.07843., articleTitle=Pointer sentinel mixture models, refAbstract=null), Reference(id=1263550874828325019, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2020, volume=21, issue=1, pageStart=140, pageEnd=null, url=null, language=null, rfNumber=[26], rfOrder=25, authorNames=RAFFEL C, SHAZEER N, ROBERTS A, journalName=The Journal of Machine Learning Research, refType=null, unstructuredReference=
RAFFEL C,
SHAZEER N,
ROBERTS A,
et al. Exploring the limits of transfer learning with a unified text-to-text transformer[J].
The Journal of Machine Learning Research,
2020,
21(1): 140., articleTitle=Exploring the limits of transfer learning with a unified text-to-text transformer, refAbstract=null), Reference(id=1263550875096760480, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2020, volume=null, issue=null, pageStart=7432, pageEnd=7439, url=null, language=null, rfNumber=[27], rfOrder=26, authorNames=BISK Y, ZELLERS R, LE BRAS R, journalName=The 34th AAAI Conference on Artificial Intelligence, refType=null, unstructuredReference=
BISK Y,
ZELLERS R,
LE BRAS R,
et al. PIQA: reasoning about physical commonsense in natural language[C]//
The 34th AAAI Conference on Artificial Intelligence. Philadelphia: AAAI,
2020: 7432-7439., articleTitle=PIQA: reasoning about physical commonsense in natural language, refAbstract=null), Reference(id=1263550875184840870, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2019, volume=null, issue=null, pageStart=2924, pageEnd=2936, url=null, language=null, rfNumber=[28], rfOrder=27, authorNames=CLARK C, LEE K, CHANG M W, journalName=2019 Conference of the North American Chapter of the Association for Computational Linguistics:Human Language Technologies, refType=null, unstructuredReference=
CLARK C,
LEE K,
CHANG M W,
et al. BoolQ: exploring the surprising difficulty of natural yes/no questions[C]//
2019 Conference of the North American Chapter of the Association for Computational Linguistics:Human Language Technologies. New York: ACL,
2019: 2924-2936., articleTitle=BoolQ: exploring the surprising difficulty of natural yes/no questions, refAbstract=null), Reference(id=1263550875340030122, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2018, volume=null, issue=null, pageStart=2381, pageEnd=2391, url=null, language=null, rfNumber=[29], rfOrder=28, authorNames=MIHAYLOV T, CLARK P, KHOT T, journalName=2018 Conference on Empirical Methods in Natural Language Processing, refType=null, unstructuredReference=
MIHAYLOV T,
CLARK P,
KHOT T,
et al. Can a suit of armor conduct electricity? A new dataset for open book question answering[C]//
2018 Conference on Empirical Methods in Natural Language Processing. New York: ACL,
2018: 2381-2391., articleTitle=Can a suit of armor conduct electricity? A new dataset for open book question answering, refAbstract=null), Reference(id=1263550875465859246, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2021, volume=64, issue=9, pageStart=99, pageEnd=106, url=null, language=null, rfNumber=[30], rfOrder=29, authorNames=SAKAGUCHI K, LE BRAS R, BHAGAVATULA C, journalName=Communications of the ACM, refType=null, unstructuredReference=
SAKAGUCHI K,
LE BRAS R,
BHAGAVATULA C,
et al. WinoGrande: an adversarial winograd schema challenge at scale[J].
Communications of the ACM,
2021,
64(9): 99-106., articleTitle=WinoGrande: an adversarial winograd schema challenge at scale, refAbstract=null), Reference(id=1263550875587494067, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=2019, volume=null, issue=null, pageStart=4791, pageEnd=4800, url=null, language=null, rfNumber=[31], rfOrder=30, authorNames=ZELLERS R, HOLTZMAN A, BISK Y, journalName=The 57th Annual Meeting of the Association for Computational Linguistics, refType=null, unstructuredReference=
ZELLERS R,
HOLTZMAN A,
BISK Y,
et al. HellaSwag: can a machine really finish your sentence?[C]//
The 57th Annual Meeting of the Association for Computational Linguistics. New York: ACL,
2019: 4791-4800., articleTitle=HellaSwag: can a machine really finish your sentence?, refAbstract=null), Reference(id=1263550875730100410, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/1803.05457, language=null, rfNumber=[32], rfOrder=31, authorNames=CLARK P, COWHEY I, ETZIONI O, journalName=null, refType=null, unstructuredReference=
CLARK P,
COWHEY I,
ETZIONI O,
et al. Think you have solved question answering? try ARC, the AI2 reasoning challenge[EB/OL]. [2025-04-10]. https://arxiv.org/abs/1803.05457., articleTitle=Think you have solved question answering? try ARC, the AI2 reasoning challenge, refAbstract=null), Reference(id=1263550875843346621, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/pdf/1806.08342, language=null, rfNumber=[33], rfOrder=32, authorNames=KRISHNAMOORTHI R, journalName=null, refType=null, unstructuredReference=
KRISHNAMOORTHI R. Quantizing deep convolutional networks for efficient inference: a whitepaper[EB/OL]. [2025-04-10]. https://arxiv.org/pdf/1806.08342., articleTitle=Quantizing deep convolutional networks for efficient inference: a whitepaper, refAbstract=null), Reference(id=1263550876216639682, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2410.03129, language=null, rfNumber=[34], rfOrder=33, authorNames=LI Z T, YAN X L, ZHANG T N, journalName=null, refType=null, unstructuredReference=
LI Z T,
YAN X L,
ZHANG T N,
et al. ARB-LLM: alternating refined binarizations for large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2410.03129., articleTitle=ARB-LLM: alternating refined binarizations for large language models, refAbstract=null), Reference(id=1263550876640264393, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, doi=null, pmid=null, pmcid=null, year=null, volume=null, issue=null, pageStart=null, pageEnd=null, url=https://arxiv.org/abs/2405.14917, language=null, rfNumber=[35], rfOrder=34, authorNames=HUANG W, QIN H T, LIU Y D, journalName=null, refType=null, unstructuredReference=
HUANG W,
QIN H T,
LIU Y D,
et al. SliM-LLM: salience-driven mixed-precision quantization for large language models[EB/OL]. [2025-04-10]. https://arxiv.org/abs/2405.14917., articleTitle=SliM-LLM: salience-driven mixed-precision quantization for large language models, refAbstract=null)], funds=[Fund(id=1263550858252436455, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, awardId=5700-202426249A-1-1-ZN, language=EN, fundingSource=Science and Technology Project of State Grid Corporation of China(5700-202426249A-1-1-ZN), fundOrder=null, country=null), Fund(id=1263550860311839726, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, awardId=5700-202426249A-1-1-ZN, language=CN, fundingSource=国家电网有限公司科技项目(5700-202426249A-1-1-ZN), fundOrder=null, country=null)], companyList=[AuthorCompany(id=1263550809204245104, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, xref=1, ext=[AuthorCompanyExt(id=1263550809216828017, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 The Key Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China), AuthorCompanyExt(id=1263550809275548275, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550809204245104, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
1 中国科学院自动化研究所复杂系统认知与决策重点实验室, 北京 100190)]), AuthorCompany(id=1263550810915521146, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, xref=2, ext=[AuthorCompanyExt(id=1263550810961658492, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550810915521146, language=EN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2 State Grid Sichuan Electric Power Company, Chengdu Sichuan 610041, China), AuthorCompanyExt(id=1263550810978435710, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, companyId=1263550810915521146, language=CN, country=null, province=null, city=null, postcode=null, companyName=null, departmentName=null, remark=
2 国网四川省电力公司, 四川 成都 610041)])], figs=[ArticleFig(id=1263550838962832259, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, label=Fig. 1, caption=
Schematic diagram of outlier parameter grouping algorithm based on second-order information, figureFileSmall=B3KjPS84BE/fepjJUqBgVg==, figureFileBig=MoWPk33MVX1gLDCvrXQxow==, tableContent=null), ArticleFig(id=1263550839797498758, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, label=图1, caption=
基于二阶信息的离群参数分组算法示意图, figureFileSmall=B3KjPS84BE/fepjJUqBgVg==, figureFileBig=MoWPk33MVX1gLDCvrXQxow==, tableContent=null), ArticleFig(id=1263550842066617238, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, label=Fig. 2, caption=
Illustration of block-wise quantization strategy, figureFileSmall=LUiByIw+vUo3HlvgjZdzZQ==, figureFileBig=61fHI8NI789LR3XRvkUx+Q==, tableContent=null), ArticleFig(id=1263550843593343903, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, label=图2, caption=
分块量化策略示意图, figureFileSmall=LUiByIw+vUo3HlvgjZdzZQ==, figureFileBig=61fHI8NI789LR3XRvkUx+Q==, tableContent=null), ArticleFig(id=1263550844482536356, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, label=Fig. 3, caption=
Visual comparison results of quantized parameters ((a) BiLLM; (b) Ours), figureFileSmall=fpVgkRx2aODeU/Mdy0x02Q==, figureFileBig=vlPH65GnNLYnhPeR29JJWA==, tableContent=null), ArticleFig(id=1263550845799547820, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, label=图3, caption=
量化后参数的可视化对比结果, figureFileSmall=fpVgkRx2aODeU/Mdy0x02Q==, figureFileBig=vlPH65GnNLYnhPeR29JJWA==, tableContent=null), ArticleFig(id=1263550847527601071, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, label=Table 1, caption=
The perplexity of different quantization methods on WikiText2
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 位宽 | 7 B | 13 B | 70 B |
| BF16 | 16.00 | 5.47 | 4.88 | 3.32 |
| RTN | 2.00 | 17 788.00 | 51 145.00 | 26 066.00 |
| GPTQ | 2.00 | 60.45 | 19.70 | 9.12 |
| QuIP | 2.00 | 39.73 | 13.48 | 6.64 |
| PBLLM | 1.70 | 69.20 | 151.09 | 28.37 |
| BILLM | 2.08 | 32.48 | 16.77 | 8.41 |
| ARB-LLM | 2.08 | 16.44 | 11.85 | 6.16 |
| SliM-LLM | 2.16 | 16.01 | 9.41 | 6.28 |
| Ours (K=50) | 2.14 | 25.44 | 13.82 | 7.56 |
| Ours (K=100) | 2.14 | 7.88 | 6.57 | 4.79 |
), ArticleFig(id=1263550848236438449, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, label=表1, caption=
不同量化方法在WikiText2数据集上的困惑度
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 位宽 | 7 B | 13 B | 70 B |
| BF16 | 16.00 | 5.47 | 4.88 | 3.32 |
| RTN | 2.00 | 17 788.00 | 51 145.00 | 26 066.00 |
| GPTQ | 2.00 | 60.45 | 19.70 | 9.12 |
| QuIP | 2.00 | 39.73 | 13.48 | 6.64 |
| PBLLM | 1.70 | 69.20 | 151.09 | 28.37 |
| BILLM | 2.08 | 32.48 | 16.77 | 8.41 |
| ARB-LLM | 2.08 | 16.44 | 11.85 | 6.16 |
| SliM-LLM | 2.16 | 16.01 | 9.41 | 6.28 |
| Ours (K=50) | 2.14 | 25.44 | 13.82 | 7.56 |
| Ours (K=100) | 2.14 | 7.88 | 6.57 | 4.79 |
), ArticleFig(id=1263550849113047991, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, label=Table 2, caption=
The perplexity of different quantization methods on C4
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 分块 | 位宽 | 困惑(PPL↓) |
| BF16 | - | 16.00 | 6.97 |
| GPTQ | 128 | 2.00 | 43.24 |
| QuIP | 128 | 2.00 | 31.94 |
| PBLLM | 128 | 1.70 | 80.15 |
| BILLM | 128 | 2.08 | 40.52 |
| ARB-LLM | 128 | 2.08 | 20.12 |
| SliM-LLM | 128 | 2.16 | 16.00 |
| Ours (K=50) | 128 | 2.14 | 21.10 |
| Ours (K=100) | 128 | 2.14 | 10.20 |
), ArticleFig(id=1263550849993851837, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, label=表2, caption=
不同量化方法在C4数据集上的困惑度对比
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 分块 | 位宽 | 困惑(PPL↓) |
| BF16 | - | 16.00 | 6.97 |
| GPTQ | 128 | 2.00 | 43.24 |
| QuIP | 128 | 2.00 | 31.94 |
| PBLLM | 128 | 1.70 | 80.15 |
| BILLM | 128 | 2.08 | 40.52 |
| ARB-LLM | 128 | 2.08 | 20.12 |
| SliM-LLM | 128 | 2.16 | 16.00 |
| Ours (K=50) | 128 | 2.14 | 21.10 |
| Ours (K=100) | 128 | 2.14 | 10.20 |
), ArticleFig(id=1263550852036477893, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, label=Table 3, caption=
Accuracy of different quantization methods on the seven zero-shot QA datasets
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 位宽 | PIQA↑ | BoolQ↑ | OBQA↑ | Winogrande↑ | ARC-e↑ | ARC-c↑ | Hellaswag↑ | Average↑ |
| BILLM | 2.08 | 60.39 | 59.42 | 29.80 | 51.93 | 39.98 | 23.72 | 35.90 | 43.02 |
| ARB-LLM | 2.08 | 66.59 | 66.33 | 29.60 | 57.85 | 51.01 | 27.56 | 48.33 | 49.61 |
| SliM-LLM | 2.16 | 53.64 | 52.59 | 15.00 | 47.98 | 25.08 | 21.50 | 26.29 | 34.58 |
| Ours (K=50) | 2.14 | 68.66 | 63.27 | 21.00 | 58.09 | 50.00 | 26.19 | 40.01 | 46.75 |
| Ours (K=100) | 2.14 | 75.41 | 69.20 | 26.40 | 66.77 | 67.42 | 35.92 | 49.80 | 55.85 |
), ArticleFig(id=1263550852959224780, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, label=表3, caption=
不同量化方法在7个zero-shot QA数据集上的准确性
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 位宽 | PIQA↑ | BoolQ↑ | OBQA↑ | Winogrande↑ | ARC-e↑ | ARC-c↑ | Hellaswag↑ | Average↑ |
| BILLM | 2.08 | 60.39 | 59.42 | 29.80 | 51.93 | 39.98 | 23.72 | 35.90 | 43.02 |
| ARB-LLM | 2.08 | 66.59 | 66.33 | 29.60 | 57.85 | 51.01 | 27.56 | 48.33 | 49.61 |
| SliM-LLM | 2.16 | 53.64 | 52.59 | 15.00 | 47.98 | 25.08 | 21.50 | 26.29 | 34.58 |
| Ours (K=50) | 2.14 | 68.66 | 63.27 | 21.00 | 58.09 | 50.00 | 26.19 | 40.01 | 46.75 |
| Ours (K=100) | 2.14 | 75.41 | 69.20 | 26.40 | 66.77 | 67.42 | 35.92 | 49.80 | 55.85 |
), ArticleFig(id=1263550853777114066, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=EN, label=Table 4, caption=
Ablation study of our method on WikiText2
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 分组 | 混合精度 | PPL (↓) |
| 本文-G | | √ | 43.38 |
| 本文-M | √ | | 143.15 |
| 本文 | √ | √ | 25.44 |
), ArticleFig(id=1263550856201421786, tenantId=1146029695717560320, journalId=1263187241531621409, articleId=1263514354360660966, language=CN, label=表4, caption=
本方法在WikiText2数据集上的消融研究
, figureFileSmall=null, figureFileBig=null, tableContent=
| 方法 | 分组 | 混合精度 | PPL (↓) |
| 本文-G | | √ | 43.38 |
| 本文-M | √ | | 143.15 |
| 本文 | √ | √ | 25.44 |
)], attaches=null, journal=Journal(id=1263185177418154013, delFlag=0, nameCn=图学学报, nameEn=Journal of Graphics, nameHistory1=null, nameHistory2=null, issn=2095-302X, eissn=null, cn=10-1034/T, coden=null, periodic=1, language=CN, oaType=null, ccby=null, superviseOffice=null, ownerOffice=null, pubOffice=null, editorOffice=null, officeType=null, aims=null, clcCode=null, officeProv=null, officeCity=null, officeAddr=null, officeZip=null, officeEmail=null, officePhone=null, editDirector=null, officeDirector=null, officeDirectorPhone=null, officeStaffNum=null, officeEmpNum=null, coverPicUrl=o2S94GfCLjDYYJqsdQm5Mw==, journalPrice=null, startedYear=null, abbrevIsoEn=Journal of Graphics, journalRemark=null, publicationField=null, createdTime=1779096415144, updatedTime=1779097332356, createdBy=18614031015, updatedBy=13701087609, firstLetterCn=J, firstLetterEn=J, subjectCode=Natural Sciences, subjectName=null, subjectCodeEn=Natural Sciences, subjectNameEn=null, picCn=o2S94GfCLjDYYJqsdQm5Mw==, picEn=f7IBTa1ZXKPoIhMUwL1eVw==, jcr=null, cjcr=null, exts=[JournalExt(id=1263189024693469540, language=CN, name=图学学报, nameHistory1=null, nameHistory2=null, managedBy=, sponsoredBy=, publishedBy=, editorOffice=, officeProv=null, officeCity=null, officeAddr=, officeZip=, editDirector=, officeDirector=null, officePhone=null, coverPicUrl=null, journalRemark=, submitArticleUrl=null, websiteUrl=, createdTime=1779097332404, updatedTime=1779097332404, createdBy=13701087609, updatedBy=13701087609, submissionGuidelinesUrl=, submissionAuthorUrl=http://www.txxb.com.cn/Journalx_txxb/authorLogOn.action, submissionEditorUrl=http://www.txxb.com.cn/Journalx_txxb/editorLogOn.action, submissionReviewUrl=http://www.txxb.com.cn/Journalx_txxb/expertLogOn.action, submissionCeEditorUrl=, submissionAeEditorUrl=, option={"copyright":""}), JournalExt(id=1263189024764772709, language=EN, name=Journal of Graphics, nameHistory1=null, nameHistory2=null, managedBy=, sponsoredBy=, publishedBy=, editorOffice=, officeProv=null, officeCity=null, officeAddr=, officeZip=, editDirector=, officeDirector=null, officePhone=null, coverPicUrl=null, journalRemark=, submitArticleUrl=null, websiteUrl=, createdTime=1779097332421, updatedTime=1779097332421, createdBy=13701087609, updatedBy=13701087609, submissionGuidelinesUrl=, submissionAuthorUrl=http://www.txxb.com.cn/Journalx_txxb/authorLogOn.action, submissionEditorUrl=http://www.txxb.com.cn/Journalx_txxb/editorLogOn.action, submissionReviewUrl=http://www.txxb.com.cn/Journalx_txxb/expertLogOn.action, submissionCeEditorUrl=, submissionAeEditorUrl=, option={"copyright":""})], databaseList=null, tenantJournalId=1263187241531621409, websiteList=[Website(id=1263188159811207473, webName=null, webTitle=null, webDomain=null, webCopyrigh=null, webIpcNo=null, seoTitle=null, seoKeywords=null, seoDescription=null, tenantJournalId=null, journalId=1263187241531621409, journalNameCn=null, journalNameEn=null, grayFlag=null, tenantId=1146029695717560320, platformId=null, journalGroupId=null, journalGroupNameCn=null, journalGroupNameEn=null, type=1, domain=https://castjournals.cast.org.cn/joweb/txxb/CN, language=CN, createTime=1779097126200, createBy=18614031015, updateTime=1779097561850, updateBy=18614031015, name=图学学报-中文, tplId=1146099689490845704, title=图学学报, delFlag=0, indexPage=/home, props=[WebsiteProps(id=1263194793929204110, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=articleTextType, value=kx, createTime=1779098707897, updateTime=1779098707897, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793853706635, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=banner, value=null, createTime=1779098707879, updateTime=1779098707879, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793966952849, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=grayFlag, value=0, createTime=1779098707906, updateTime=1779098707906, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793824346506, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=logo, value=https://castjournals.cast.org.cn/joweb/txxb/CN/file/pic?fileId=lmN7m2FoR6FhgnIimGeZBg==, createTime=1779098707872, updateTime=1779098707872, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793992118675, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=minRunFlag, value=0, createTime=1779098707912, updateTime=1779098707912, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793891455373, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=picServerUrl, value=https://castjournals.cast.org.cn/joweb/txxb/CN/file/pic, createTime=1779098707888, updateTime=1779098707888, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793979535762, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=silenceFlag, value=0, createTime=1779098707909, updateTime=1779098707909, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793883066764, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=staticResourcePath, value=https://castjournals.cast.org.cn/joweb/cast_kjdb_cn_619/, createTime=1779098707886, updateTime=1779098707886, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793933398415, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=themeColor, value=null, createTime=1779098707898, updateTime=1779098707898, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194793941787024, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188159811207473, code=themeStyle, value=null, createTime=1779098707900, updateTime=1779098707900, creator=18614031015, updator=18614031015)]), Website(id=1263188160184500536, webName=null, webTitle=null, webDomain=null, webCopyrigh=null, webIpcNo=null, seoTitle=null, seoKeywords=null, seoDescription=null, tenantJournalId=null, journalId=1263187241531621409, journalNameCn=null, journalNameEn=null, grayFlag=null, tenantId=1146029695717560320, platformId=null, journalGroupId=null, journalGroupNameCn=null, journalGroupNameEn=null, type=1, domain=https://castjournals.cast.org.cn/joweb/txxb/EN, language=EN, createTime=1779097126289, createBy=18614031015, updateTime=1779097557941, updateBy=18614031015, name=图学学报-英文, tplId=1146101810881728533, title=Journal of Graphics, delFlag=0, indexPage=/home, props=[WebsiteProps(id=1263194821443838360, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=articleTextType, value=kx, createTime=1779098714457, updateTime=1779098714457, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194821393506709, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=banner, value=null, createTime=1779098714445, updateTime=1779098714445, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194822144287131, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=grayFlag, value=0, createTime=1779098714624, updateTime=1779098714624, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194821334786452, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=logo, value=https://castjournals.cast.org.cn/joweb/txxb/EN/file/pic?fileId=lmN7m2FoR6FhgnIimGeZBg==, createTime=1779098714431, updateTime=1779098714431, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194822316253597, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=minRunFlag, value=0, createTime=1779098714665, updateTime=1779098714665, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194821414478231, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=picServerUrl, value=https://castjournals.cast.org.cn/joweb/txxb/EN/file/pic, createTime=1779098714450, updateTime=1779098714450, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194822249144732, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=silenceFlag, value=0, createTime=1779098714649, updateTime=1779098714649, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194821401895318, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=staticResourcePath, value=https://castjournals.cast.org.cn/joweb/cast_kjdb_en_623/, createTime=1779098714448, updateTime=1779098714448, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194821716468121, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=themeColor, value=null, createTime=1779098714522, updateTime=1779098714522, creator=18614031015, updator=18614031015), WebsiteProps(id=1263194821850685850, tenantId=1146029695717560320, journalId=null, journalGroupId=null, siteId=1263188160184500536, code=themeStyle, value=null, createTime=1779098714554, updateTime=1779098714554, creator=18614031015, updator=18614031015)])], journalTitle=图学学报, weixinUrl=null, journalUrl=http://www.txxb.com.cn/, iacademicId=null, status=1, seqNo=null, journalTitleEn=Journal of Graphics, journalPhotoCn=o2S94GfCLjDYYJqsdQm5Mw==, journalPhotoEn=f7IBTa1ZXKPoIhMUwL1eVw==, journalFirstLetter=J, journalRecommend=null, journalNew=null, journalCollection=null, jcrJf=null, cjcrJf=null, jcrJfStr=null, cjcrJfStr=null, submissionFirstDecision=null, sciSubjectClassification=null, casSubjectClassification=null, citeScore=null, totalCitationFrequency=null, icpCode=null, psCode=null, advertisingLicenseCode=null, copyrightInformation=null, country=null, option=, provinceCode=null, provinceName=null, collectFlag=false), detailUrlCn=https://castjournals.cast.org.cn/joweb/txxb/CN/10.11996/JG.j.2095-302X.2026010039, detailUrlEn=https://castjournals.cast.org.cn/joweb/txxb/EN/10.11996/JG.j.2095-302X.2026010039, pdfUrlCn=https://castjournals.cast.org.cn/joweb/txxb/CN/PDF/10.11996/JG.j.2095-302X.2026010039, pdfUrlEn=https://castjournals.cast.org.cn/joweb/txxb/EN/PDF/10.11996/JG.j.2095-302X.2026010039, aliStartDate=null, aliEndDate=null, collectionFlag=false, citedCount=null, citedUrl=null, reference=null)