Skip to content

Commit 462c017

Browse files
authored
darwin/arm64: fix SIMD detection and improve ARM feature probing (#160)
* darwin/arm64: fix SIMD detection and improve ARM feature probing Detection of Advanced SIMD (NEON) on M1 Macs was broken due to querying the wrong sysctl identifier. Apple's documentation lists "hw.optional.AdvSIMD" as the correct identifier, but in reality, it's "hw.optional.arm.AdvSIMD" as confirmed by `sysctl -a`. This patch corrects the identifier and adds support for its alias, "hw.optional .neon," ensuring proper detection across all models. Additionally, this patch revisits all ARM features detected by the package, cross-referencing identifiers (and aliases) against both Apple's official sysctl documentation: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics and the ARM architecture reference manual: https://developer.arm.com/documentation/ddi0487/latest Each ARM feature now maps to all known sysctl aliases, preventing false negatives and improving detection accuracy on Apple Silicon. * nit * fix typo
1 parent 668c84e commit 462c017

2 files changed

Lines changed: 42 additions & 40 deletions

File tree

cpuid.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ const (
286286
CRC32 // CRC32/CRC32C instructions
287287
DCPOP // Data cache clean to Point of Persistence (DC CVAP)
288288
EVTSTRM // Generic timer
289-
FCMA // Floatin point complex number addition and multiplication
289+
FCMA // Floating point complex number addition and multiplication
290290
FHM // FMLAL and FMLSL instructions
291291
FP // Single-precision and double-precision floating point
292292
FPHP // Half-precision floating point

os_darwin_arm64.go

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,16 @@ func sysctlGetInt64(unknown int, names ...string) int {
6565
return unknown
6666
}
6767

68-
func setFeature(c *CPUInfo, name string, feature FeatureID) {
69-
c.featureSet.setIf(sysctlGetBool(name), feature)
68+
func setFeature(c *CPUInfo, feature FeatureID, aliases ...string) {
69+
for _, alias := range aliases {
70+
set := sysctlGetBool(alias)
71+
c.featureSet.setIf(set, feature)
72+
if set {
73+
break
74+
}
75+
}
7076
}
77+
7178
func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
7279
c.BrandName = sysctlGetString("machdep.cpu.brand_string")
7380

@@ -87,41 +94,36 @@ func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
8794
c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
8895
c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
8996

90-
// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
91-
setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
92-
setFeature(c, "hw.optional.AdvSIMD", ASIMD)
93-
setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
94-
setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
95-
setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
96-
setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
97-
// setFeature(c, "", EVTSTRM)
98-
setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
99-
setFeature(c, "hw.optional.arm.FEAT_FHM", FHM)
100-
setFeature(c, "hw.optional.arm.FEAT_FP", FP)
101-
setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
102-
setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
103-
setFeature(c, "hw.optional.arm.FEAT_RNG", RNDR)
104-
setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
105-
setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
106-
setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
107-
setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
108-
setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
109-
setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
110-
setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
111-
setFeature(c, "hw.optional.arm.FEAT_TLBIOS", TLB)
112-
setFeature(c, "hw.optional.arm.FEAT_TLBIRANGE", TLB)
113-
setFeature(c, "hw.optional.arm.FEAT_FlagM", TS)
114-
setFeature(c, "hw.optional.arm.FEAT_FlagM2", TS)
115-
// setFeature(c, "", SM3)
116-
// setFeature(c, "", SM4)
117-
setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
118-
119-
// from empirical observation
120-
setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
121-
setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
122-
setFeature(c, "hw.optional.floatingpoint", FP)
123-
setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
124-
setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
125-
setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
126-
setFeature(c, "hw.optional.armv8_crc32", CRC32)
97+
// ARM features:
98+
//
99+
// Note: On some Apple Silicon system, some feats have aliases. See:
100+
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
101+
// When so, we look at all aliases and consider a feature available when at least one identifier matches.
102+
setFeature(c, AESARM, "hw.optional.arm.FEAT_AES") // AES instructions
103+
setFeature(c, ASIMD, "hw.optional.arm.AdvSIMD", "hw.optional.neon") // Advanced SIMD
104+
setFeature(c, ASIMDDP, "hw.optional.arm.FEAT_DotProd") // SIMD Dot Product
105+
setFeature(c, ASIMDHP, "hw.optional.arm.AdvSIMD_HPFPCvt", "hw.optional.neon_hpfp") // Advanced SIMD half-precision floating point
106+
setFeature(c, ASIMDRDM, "hw.optional.arm.FEAT_RDM") // Rounding Double Multiply Accumulate/Subtract
107+
setFeature(c, ATOMICS, "hw.optional.arm.FEAT_LSE", "hw.optional.armv8_1_atomics") // Large System Extensions (LSE)
108+
setFeature(c, CRC32, "hw.optional.arm.FEAT_CRC32", "hw.optional.armv8_crc32") // CRC32/CRC32C instructions
109+
setFeature(c, DCPOP, "hw.optional.arm.FEAT_DPB") // Data cache clean to Point of Persistence (DC CVAP)
110+
setFeature(c, EVTSTRM, "hw.optional.arm.FEAT_ECV") // Generic timer
111+
setFeature(c, FCMA, "hw.optional.arm.FEAT_FCMA", "hw.optional.armv8_3_compnum") // Floating point complex number addition and multiplication
112+
setFeature(c, FHM, "hw.optional.armv8_2_fhm", "hw.optional.arm.FEAT_FHM") // FMLAL and FMLSL instructions
113+
setFeature(c, FP, "hw.optional.floatingpoint") // Single-precision and double-precision floating point
114+
setFeature(c, FPHP, "hw.optional.arm.FEAT_FP16", "hw.optional.neon_fp16") // Half-precision floating point
115+
setFeature(c, GPA, "hw.optional.arm.FEAT_PAuth") // Generic Pointer Authentication
116+
setFeature(c, JSCVT, "hw.optional.arm.FEAT_JSCVT") // Javascript-style double->int convert (FJCVTZS)
117+
setFeature(c, LRCPC, "hw.optional.arm.FEAT_LRCPC") // Weaker release consistency (LDAPR, etc)
118+
setFeature(c, PMULL, "hw.optional.arm.FEAT_PMULL") // Polynomial Multiply instructions (PMULL/PMULL2)
119+
setFeature(c, RNDR, "hw.optional.arm.FEAT_RNG") // Random Number instructions
120+
setFeature(c, TLB, "hw.optional.arm.FEAT_TLBIOS", "hw.optional.arm.FEAT_TLBIRANGE") // Outer Shareable and TLB range maintenance instructions
121+
setFeature(c, TS, "hw.optional.arm.FEAT_FlagM", "hw.optional.arm.FEAT_FlagM2") // Flag manipulation instructions
122+
setFeature(c, SHA1, "hw.optional.arm.FEAT_SHA1") // SHA-1 instructions (SHA1C, etc)
123+
setFeature(c, SHA2, "hw.optional.arm.FEAT_SHA256") // SHA-2 instructions (SHA256H, etc)
124+
setFeature(c, SHA3, "hw.optional.arm.FEAT_SHA3") // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
125+
setFeature(c, SHA512, "hw.optional.arm.FEAT_SHA512") // SHA512 instructions
126+
setFeature(c, SM3, "hw.optional.arm.FEAT_SM3") // SM3 instructions
127+
setFeature(c, SM4, "hw.optional.arm.FEAT_SM4") // SM4 instructions
128+
setFeature(c, SVE, "hw.optional.arm.FEAT_SVE") // Scalable Vector Extension
127129
}

0 commit comments

Comments
 (0)