diff --git a/Examples/Package.swift b/Examples/Package.swift index ce5b8381..02d2cfde 100644 --- a/Examples/Package.swift +++ b/Examples/Package.swift @@ -9,6 +9,12 @@ let package = Package( .package(path: "../") ], targets: [ + .executableTarget( + name: "embedded-wat", + dependencies: [ + .product(name: "WAT", package: "WasmKit") + ] + ), .executableTarget(name: "Factorial", dependencies: [ .product(name: "WasmKit", package: "WasmKit"), .product(name: "WAT", package: "WasmKit") diff --git a/Examples/Sources/embedded-wat/Entrypoint.swift b/Examples/Sources/embedded-wat/Entrypoint.swift new file mode 100644 index 00000000..67db4def --- /dev/null +++ b/Examples/Sources/embedded-wat/Entrypoint.swift @@ -0,0 +1,24 @@ +import WAT + +@main +struct Entrypoint { + static func main() { + var watString = "" + while let line = readLine() { + if !watString.isEmpty { + watString += "\n" + } + watString += line + } + guard !watString.isEmpty else { return } + + do throws(WatParserError) { + let bytes = try wat2wasm(watString) + for byte in bytes { + print("0x" + String(byte, radix: 16)) + } + } catch { + print(error) + } + } +} diff --git a/Package.swift b/Package.swift index 7fa1d93e..6dfaa982 100644 --- a/Package.swift +++ b/Package.swift @@ -77,7 +77,7 @@ let package = Package( .target( name: "WAT", dependencies: [ - "WasmParser", + "WasmParserCore", .target( name: "ComponentModel", condition: .when(traits: ["ComponentModel"]) @@ -97,10 +97,9 @@ let package = Package( ), .target( - name: "WasmParser", + name: "WasmParserCore", dependencies: [ "WasmTypes", - .product(name: "SystemPackage", package: "swift-system"), .target( name: "ComponentModel", condition: .when(traits: ["ComponentModel"]) @@ -108,6 +107,14 @@ let package = Package( ], exclude: ["CMakeLists.txt"] ), + .target( + name: "WasmParser", + dependencies: [ + "WasmParserCore", + .product(name: "SystemPackage", package: "swift-system"), + ], + exclude: ["CMakeLists.txt"] + ), .testTarget( name: "WasmParserTests", dependencies: [ diff --git a/Sources/CMakeLists.txt b/Sources/CMakeLists.txt index 5e28ac12..6808c17a 100644 --- a/Sources/CMakeLists.txt +++ b/Sources/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(SystemExtras) add_subdirectory(WASI) add_subdirectory(WasmTypes) add_subdirectory(WasmParser) +add_subdirectory(WasmParserCore) add_subdirectory(WAT) if(WASMKIT_BUILD_CLI) diff --git a/Sources/WAT/BinaryEncoding/BinaryInstructionEncoder.swift b/Sources/WAT/BinaryEncoding/BinaryInstructionEncoder.swift index 8c5b7e61..e8d54841 100644 --- a/Sources/WAT/BinaryEncoding/BinaryInstructionEncoder.swift +++ b/Sources/WAT/BinaryEncoding/BinaryInstructionEncoder.swift @@ -2,7 +2,7 @@ //// Automatically generated by Utilities/Sources/WasmGen.swift //// DO NOT EDIT DIRECTLY -import WasmParser +import WasmParserCore import WasmTypes /// An instruction encoder that is responsible for encoding opcodes and immediates diff --git a/Sources/WAT/BinaryEncoding/ComponentEncoder.swift b/Sources/WAT/BinaryEncoding/ComponentEncoder.swift index b510a726..7d8f0dea 100644 --- a/Sources/WAT/BinaryEncoding/ComponentEncoder.swift +++ b/Sources/WAT/BinaryEncoding/ComponentEncoder.swift @@ -1,6 +1,6 @@ #if ComponentModel import ComponentModel - import WasmParser + import WasmParserCore import WasmTypes /// Binary component encoder, implementing CM proposal `Binary.md` spec: https://github.com/WebAssembly/component-model/blob/main/design/mvp/Binary.md diff --git a/Sources/WAT/BinaryEncoding/Encoder.swift b/Sources/WAT/BinaryEncoding/Encoder.swift index 47a037cb..89250887 100644 --- a/Sources/WAT/BinaryEncoding/Encoder.swift +++ b/Sources/WAT/BinaryEncoding/Encoder.swift @@ -1,4 +1,4 @@ -import WasmParser +import WasmParserCore import WasmTypes package struct Encoder { @@ -511,7 +511,7 @@ struct ExpressionEncoder: BinaryInstructionEncoder { mutating func encodeInstruction(_ opcode: [UInt8]) { encoder.output.append(contentsOf: opcode) } - mutating func encodeImmediates(blockType: WasmParser.BlockType) { + mutating func encodeImmediates(blockType: WasmParserCore.BlockType) { switch blockType { case .empty: encoder.output.append(0x40) case .type(let valueType): encoder.encode(valueType) @@ -528,12 +528,12 @@ struct ExpressionEncoder: BinaryInstructionEncoder { mutating func encodeImmediates(globalIndex: UInt32) { encodeUnsigned(globalIndex) } mutating func encodeImmediates(localIndex: UInt32) { encodeUnsigned(localIndex) } mutating func encodeImmediates(typeIndex: UInt32) { encodeUnsigned(typeIndex) } - mutating func encodeImmediates(memarg: WasmParser.MemArg) { + mutating func encodeImmediates(memarg: WasmParserCore.MemArg) { encodeUnsigned(UInt(memarg.align)) encodeUnsigned(memarg.offset) } mutating func encodeImmediates(lane: UInt8) { encoder.output.append(lane) } - mutating func encodeImmediates(memarg: WasmParser.MemArg, lane: UInt8) { + mutating func encodeImmediates(memarg: WasmParserCore.MemArg, lane: UInt8) { encodeImmediates(memarg: memarg) encodeImmediates(lane: lane) } @@ -543,7 +543,7 @@ struct ExpressionEncoder: BinaryInstructionEncoder { mutating func encodeImmediates(memory: UInt32) { encodeUnsigned(memory) } mutating func encodeImmediates(relativeDepth: UInt32) { encodeUnsigned(relativeDepth) } mutating func encodeImmediates(table: UInt32) { encodeUnsigned(table) } - mutating func encodeImmediates(targets: WasmParser.BrTable) { + mutating func encodeImmediates(targets: WasmParserCore.BrTable) { encoder.encodeVector(targets.labelIndices) { value, encoder in encoder.writeUnsignedLEB128(value) } @@ -554,8 +554,8 @@ struct ExpressionEncoder: BinaryInstructionEncoder { mutating func encodeImmediates(value: Int32) { encodeSigned(value) } mutating func encodeImmediates(value: Int64) { encodeSigned(value) } mutating func encodeImmediates(value: WasmTypes.V128) { encoder.output.append(contentsOf: value.bytes) } - mutating func encodeImmediates(value: WasmParser.IEEE754.Float32) { encodeFixedWidth(value.bitPattern) } - mutating func encodeImmediates(value: WasmParser.IEEE754.Float64) { encodeFixedWidth(value.bitPattern) } + mutating func encodeImmediates(value: WasmParserCore.IEEE754.Float32) { encodeFixedWidth(value.bitPattern) } + mutating func encodeImmediates(value: WasmParserCore.IEEE754.Float64) { encodeFixedWidth(value.bitPattern) } mutating func encodeImmediates(dstMem: UInt32, srcMem: UInt32) { encodeUnsigned(dstMem) encodeUnsigned(srcMem) @@ -625,7 +625,7 @@ func encode(module: inout Wat, options: EncodeOptions) throws(WatParserError) -> // Section 1: Type section if !module.types.isEmpty { encoder.section(id: 0x01) { encoder in - encoder.encodeVector(module.types, transform: \.type.signature) + encoder.encodeVector(module.types, transform: { $0.type.signature }) } } diff --git a/Sources/WAT/CMakeLists.txt b/Sources/WAT/CMakeLists.txt index 07d9072f..0b5325c2 100644 --- a/Sources/WAT/CMakeLists.txt +++ b/Sources/WAT/CMakeLists.txt @@ -14,4 +14,4 @@ add_wasmkit_library(WAT ) target_link_wasmkit_libraries(WAT PUBLIC - WasmParser) + WasmParserCore) diff --git a/Sources/WAT/Lexer.swift b/Sources/WAT/Lexer.swift index 94ede4fb..71c56805 100644 --- a/Sources/WAT/Lexer.swift +++ b/Sources/WAT/Lexer.swift @@ -1,4 +1,5 @@ -import WasmParser +import WasmParserCore +import WasmTypes enum TokenKind: Equatable { case leftParen @@ -471,7 +472,7 @@ func parseHexDigit(_ char: Unicode.Scalar) throws(WatParserError) -> UInt8? { extension Lexer.Cursor { mutating func parseHexNumber() throws(WatParserError) -> String { - return try parseUnderscoredChars(continueParsing: \.properties.isASCIIHexDigit) + return try parseUnderscoredChars(continueParsing: { $0.properties.isASCIIHexDigit }) } mutating func parseDecimalNumber() throws(WatParserError) -> String { diff --git a/Sources/WAT/NameMapping.swift b/Sources/WAT/NameMapping.swift index 2a35eea2..b7bcc06a 100644 --- a/Sources/WAT/NameMapping.swift +++ b/Sources/WAT/NameMapping.swift @@ -1,4 +1,4 @@ -import WasmParser +import WasmParserCore import WasmTypes /// A name with its location in the source file diff --git a/Sources/WAT/ParseTextInstruction.swift b/Sources/WAT/ParseTextInstruction.swift index 0f463bf4..58f8daca 100644 --- a/Sources/WAT/ParseTextInstruction.swift +++ b/Sources/WAT/ParseTextInstruction.swift @@ -2,7 +2,7 @@ //// Automatically generated by Utilities/Sources/WasmGen.swift //// DO NOT EDIT DIRECTLY -import WasmParser +import WasmParserCore import WasmTypes /// Parses a text instruction, consuming immediate tokens as necessary. diff --git a/Sources/WAT/Parser.swift b/Sources/WAT/Parser.swift index 240d52f4..23718a2e 100644 --- a/Sources/WAT/Parser.swift +++ b/Sources/WAT/Parser.swift @@ -1,4 +1,4 @@ -import WasmParser +import WasmParserCore import WasmTypes internal struct Parser { @@ -266,7 +266,7 @@ internal struct Parser { mutating func expectFloat32() throws(WatParserError) -> IEEE754.Float32 { let bitPattern = try expectFloatingPoint( - Float32.self, toBitPattern: \.bitPattern, + Float32.self, toBitPattern: { $0.bitPattern }, isNaN: { Float32(bitPattern: $0).isNaN }, buildBitPattern: { UInt32( @@ -280,7 +280,7 @@ internal struct Parser { mutating func expectFloat64() throws(WatParserError) -> IEEE754.Float64 { let bitPattern = try expectFloatingPoint( - Float64.self, toBitPattern: \.bitPattern, + Float64.self, toBitPattern: { $0.bitPattern }, isNaN: { Float64(bitPattern: $0).isNaN }, buildBitPattern: { UInt64( diff --git a/Sources/WAT/Parser/ComponentDef.swift b/Sources/WAT/Parser/ComponentDef.swift index 23149f1d..c3fcc9e6 100644 --- a/Sources/WAT/Parser/ComponentDef.swift +++ b/Sources/WAT/Parser/ComponentDef.swift @@ -1,7 +1,7 @@ #if ComponentModel import ComponentModel - import WasmParser + import WasmParserCore import WasmTypes extension ComponentWatParser { diff --git a/Sources/WAT/Parser/ComponentWastParser.swift b/Sources/WAT/Parser/ComponentWastParser.swift index 3ce86f35..a99270ab 100644 --- a/Sources/WAT/Parser/ComponentWastParser.swift +++ b/Sources/WAT/Parser/ComponentWastParser.swift @@ -1,7 +1,7 @@ #if ComponentModel import ComponentModel - import WasmParser + import WasmParserCore import WasmTypes // MARK: - Component WAST Directive Types diff --git a/Sources/WAT/Parser/ComponentWatParser.swift b/Sources/WAT/Parser/ComponentWatParser.swift index a7c9c2f7..48c7c1a6 100644 --- a/Sources/WAT/Parser/ComponentWatParser.swift +++ b/Sources/WAT/Parser/ComponentWatParser.swift @@ -1,7 +1,7 @@ #if ComponentModel import ComponentModel - import WasmParser + import WasmParserCore import WasmTypes public struct ComponentWatParser: ~Copyable { @@ -832,14 +832,10 @@ parser = tempParser.parser // Core types must be simple value types (i32, i64, f32, f64), not type references. - // Resolve immediately using a dummy resolver since these types don't reference other definitions. - struct DummyResolver: NameToIndexResolver { - func resolveIndex(use: Parser.IndexOrId) throws(WatParserError) -> Int { - throw WatParserError("Core value types cannot reference other types", location: use.location) - } + // Resolve immediately using a throwing resolver since these types don't reference other definitions. + return try unresolvedType.resolve { (use: Parser.IndexOrId) throws(WatParserError) in + throw WatParserError("Core value types cannot reference other types", location: use.location) } - - return try unresolvedType.resolve(DummyResolver()) } /// Resolve an outer alias reference to a component's type diff --git a/Sources/WAT/Parser/ExpressionParser.swift b/Sources/WAT/Parser/ExpressionParser.swift index 85ada776..58e4c074 100644 --- a/Sources/WAT/Parser/ExpressionParser.swift +++ b/Sources/WAT/Parser/ExpressionParser.swift @@ -1,4 +1,4 @@ -import WasmParser +import WasmParserCore import WasmTypes struct ExpressionParser where Visitor.VisitorError == WatParserError { diff --git a/Sources/WAT/Parser/NormalizedDefinition.swift b/Sources/WAT/Parser/NormalizedDefinition.swift index d12a4c07..4df19654 100644 --- a/Sources/WAT/Parser/NormalizedDefinition.swift +++ b/Sources/WAT/Parser/NormalizedDefinition.swift @@ -1,7 +1,7 @@ #if ComponentModel import ComponentModel - import WasmParser + import WasmParserCore import WasmTypes extension ComponentWatParser { diff --git a/Sources/WAT/Parser/WastParser.swift b/Sources/WAT/Parser/WastParser.swift index 9640fda2..0c8e5911 100644 --- a/Sources/WAT/Parser/WastParser.swift +++ b/Sources/WAT/Parser/WastParser.swift @@ -1,4 +1,4 @@ -import WasmParser +import WasmParserCore import WasmTypes protocol WastConstInstructionVisitor: InstructionVisitor { diff --git a/Sources/WAT/Parser/WatParser.swift b/Sources/WAT/Parser/WatParser.swift index dea6dd4f..f4acbaef 100644 --- a/Sources/WAT/Parser/WatParser.swift +++ b/Sources/WAT/Parser/WatParser.swift @@ -1,4 +1,4 @@ -import WasmParser +import WasmParserCore import WasmTypes struct WatParser { @@ -36,32 +36,40 @@ struct WatParser { } struct UnresolvedType { - private let make: (any NameToIndexResolver) -> Result + typealias IndexResolver = (Parser.IndexOrId) throws(WatParserError) -> Int - init(make: @escaping (any NameToIndexResolver) -> Result) { + private let make: (IndexResolver) throws(WatParserError) -> T + + init(make: @escaping (IndexResolver) throws(WatParserError) -> T) { self.make = make } init(_ value: T) { - self.make = { _ in .success(value) } + self.make = { _ in value } } func project(_ keyPath: KeyPath) -> UnresolvedType { - return UnresolvedType { - let parent = make($0) - return parent.map { $0[keyPath: keyPath] } + return UnresolvedType { (resolveIndex: IndexResolver) throws(WatParserError) in + try self.make(resolveIndex)[keyPath: keyPath] } } func map(_ transform: @escaping (T) -> U) -> UnresolvedType { - return UnresolvedType(make: { resolver in Result { () throws(WatParserError) in transform(try resolve(resolver)) } }) + return UnresolvedType { (resolveIndex: IndexResolver) throws(WatParserError) in + try transform(self.make(resolveIndex)) + } } func resolve(_ typeMap: TypesMap) throws(WatParserError) -> T { - return try resolve(typeMap.nameMapping) + return try make(typeMap.nameMapping.resolveIndex) } - func resolve(_ resolver: any NameToIndexResolver) throws(WatParserError) -> T { - return try make(resolver).get() + + func resolve(_ resolver: some NameToIndexResolver) throws(WatParserError) -> T { + return try make(resolver.resolveIndex) + } + + func resolve(_ resolveIndex: IndexResolver) throws(WatParserError) -> T { + return try make(resolveIndex) } } @@ -324,7 +332,7 @@ struct WatParser { if try parser.peek(.leftParen) != nil { let (numberOfItems, indices) = try parseExprList() inlineElement = ElementDecl( - mode: .inline, type: tableType.project(\.elementType), indices: indices + mode: .inline, type: tableType.map({ $0.elementType }), indices: indices ) tableType = tableType.map { var value = $0 @@ -650,13 +658,11 @@ struct WatParser { let (params, names) = try params(mayHaveName: true) let results = try results() try parser.expect(.rightParen) - return UnresolvedType { typeMap in - Result { () throws(WatParserError) in - let params = try params.map { param throws(WatParserError) in try param.resolve(typeMap) } - let results = try results.map { result throws(WatParserError) in try result.resolve(typeMap) } - let signature = WasmTypes.FunctionType(parameters: params, results: results) - return FunctionType(signature: signature, parameterNames: names) - } + return UnresolvedType { (resolveIndex: UnresolvedType.IndexResolver) throws(WatParserError) in + let params = try params.map { param throws(WatParserError) in try param.resolve(resolveIndex) } + let results = try results.map { result throws(WatParserError) in try result.resolve(resolveIndex) } + let signature = WasmTypes.FunctionType(parameters: params, results: results) + return FunctionType(signature: signature, parameterNames: names) } } @@ -666,13 +672,11 @@ struct WatParser { if results.isEmpty, params.isEmpty { return nil } - return UnresolvedType { typeMap in - Result { () throws(WatParserError) in - let params = try params.map { resolver throws(WatParserError) in try resolver.resolve(typeMap) } - let results = try results.map { resolver throws(WatParserError) in try resolver.resolve(typeMap) } - let signature = WasmTypes.FunctionType(parameters: params, results: results) - return FunctionType(signature: signature, parameterNames: names) - } + return UnresolvedType { (resolveIndex: UnresolvedType.IndexResolver) throws(WatParserError) in + let params = try params.map { param throws(WatParserError) in try param.resolve(resolveIndex) } + let results = try results.map { result throws(WatParserError) in try result.resolve(resolveIndex) } + let signature = WasmTypes.FunctionType(parameters: params, results: results) + return FunctionType(signature: signature, parameterNames: names) } } @@ -746,8 +750,8 @@ struct WatParser { } else if try parser.takeKeyword("extern") { return UnresolvedType(.abstract(.externRef)) } else if let id = try parser.takeIndexOrId() { - return UnresolvedType(make: { resolver in - Result { () throws(WatParserError) in try .concrete(typeIndex: UInt32(resolver.resolveIndex(use: id))) } + return UnresolvedType(make: { (resolveIndex: UnresolvedType.IndexResolver) throws(WatParserError) in + try .concrete(typeIndex: UInt32(resolveIndex(id))) }) } throw WatParserError("expected heap type", location: parser.lexer.location()) diff --git a/Sources/WAT/WAT.swift b/Sources/WAT/WAT.swift index 7e8a54d3..94e151cd 100644 --- a/Sources/WAT/WAT.swift +++ b/Sources/WAT/WAT.swift @@ -1,4 +1,5 @@ -import WasmParser +import WasmParserCore +import WasmTypes /// Options for encoding a WebAssembly module into a binary format. public struct EncodeOptions: Sendable { @@ -39,7 +40,7 @@ public func wat2wasm( _ input: String, features: WasmFeatureSet = .default, options: EncodeOptions = .default -) throws -> [UInt8] { +) throws(WatParserError) -> [UInt8] { #if ComponentModel // Look ahead to determine if this is a component or module var peekParser = Parser(input) @@ -111,7 +112,7 @@ public struct Wat { /// This method effectively consumes the module value, encoding it into a /// binary format byte array. If you need to encode the module multiple times, /// you should create a copy of the module value before encoding it. - public consuming func encode(options: EncodeOptions = .default) throws -> [UInt8] { + public consuming func encode(options: EncodeOptions = .default) throws(WatParserError) -> [UInt8] { try WAT.encode(module: &self, options: options) } } @@ -137,7 +138,7 @@ public struct Wat { /// /// let wasm = try wat.encode() /// ``` -public func parseWAT(_ input: String, features: WasmFeatureSet = .default) throws -> Wat { +public func parseWAT(_ input: String, features: WasmFeatureSet = .default) throws(WatParserError) -> Wat { var parser = Parser(input) var wat: Wat if try parser.takeParenBlockStart("module") { @@ -168,7 +169,7 @@ public struct Wast { /// /// - Returns: A tuple containing the parsed directive and its location in the WAST script /// or `nil` if there are no more directives to parse. - public mutating func nextDirective() throws -> (directive: WastDirective, location: Location)? { + public mutating func nextDirective() throws(WatParserError) -> (directive: WastDirective, location: Location)? { let location = try parser.parser.peek()?.location(in: parser.parser.lexer) ?? parser.parser.lexer.location() if let directive = try parser.nextDirective() { return (directive, location) diff --git a/Sources/WAT/WatParserError.swift b/Sources/WAT/WatParserError.swift index 053cddf7..e3658183 100644 --- a/Sources/WAT/WatParserError.swift +++ b/Sources/WAT/WatParserError.swift @@ -1,6 +1,6 @@ /// An error type thrown during WAT (WebAssembly Text Format) parsing. public struct WatParserError: Swift.Error { - package let message: String + public let message: String public let location: Location? package init(_ message: String, location: Location?) { diff --git a/Sources/WasmKit/ModuleParser.swift b/Sources/WasmKit/ModuleParser.swift index 035ae2d6..66ea5b5a 100644 --- a/Sources/WasmKit/ModuleParser.swift +++ b/Sources/WasmKit/ModuleParser.swift @@ -1,5 +1,6 @@ import SystemPackage import WasmParser +import WasmParserCore #if os(Windows) import ucrt diff --git a/Sources/WasmParser/CMakeLists.txt b/Sources/WasmParser/CMakeLists.txt index b3c14390..c3ccd446 100644 --- a/Sources/WasmParser/CMakeLists.txt +++ b/Sources/WasmParser/CMakeLists.txt @@ -1,14 +1,7 @@ add_wasmkit_library(WasmParser - Stream/ByteStream.swift - Stream/FileHandleStream.swift - BinaryInstructionDecoder.swift - InstructionVisitor.swift - LEB.swift - ParsingLimits.swift - WasmParserError.swift + FileHandleStream.swift WasmParser.swift - WasmTypes.swift ) target_link_wasmkit_libraries(WasmParser PUBLIC - WasmTypes SystemPackage) + WasmParserCore) diff --git a/Sources/WasmParser/Stream/FileHandleStream.swift b/Sources/WasmParser/FileHandleStream.swift similarity index 99% rename from Sources/WasmParser/Stream/FileHandleStream.swift rename to Sources/WasmParser/FileHandleStream.swift index da4a87b6..20d59691 100644 --- a/Sources/WasmParser/Stream/FileHandleStream.swift +++ b/Sources/WasmParser/FileHandleStream.swift @@ -1,3 +1,5 @@ +@_exported import WasmParserCore + import struct SystemPackage.FileDescriptor public final class FileHandleStream: ByteStream { diff --git a/Sources/WasmParser/WasmParser.swift b/Sources/WasmParser/WasmParser.swift index ad964e08..49045913 100644 --- a/Sources/WasmParser/WasmParser.swift +++ b/Sources/WasmParser/WasmParser.swift @@ -1,5 +1,3 @@ -import WasmTypes - import struct SystemPackage.FileDescriptor import struct SystemPackage.FilePath @@ -7,53 +5,6 @@ import struct SystemPackage.FilePath import ucrt #endif -/// A streaming parser for WebAssembly binary format. -/// -/// The parser is designed to be used to incrementally parse a WebAssembly binary bytestream. -public struct Parser { - @usableFromInline - let stream: Stream - @usableFromInline let limits: ParsingLimits - @usableFromInline var orderTracking = OrderTracking() - - @usableFromInline - enum NextParseTarget { - case header - case section - } - @usableFromInline - var nextParseTarget: NextParseTarget - - public let features: WasmFeatureSet - public var offset: Int { - return stream.currentIndex - } - - public init(stream: Stream, features: WasmFeatureSet = .default) { - self.stream = stream - self.features = features - self.nextParseTarget = .header - self.limits = .default - } - - @usableFromInline - internal func makeError(_ message: WasmParserError.Message) -> WasmParserError { - return WasmParserError(message: message, offset: offset) - } -} - -extension Parser where Stream == StaticByteStream { - - /// Initialize a new parser with the given bytes - /// - /// - Parameters: - /// - bytes: The bytes of the WebAssembly binary file to parse - /// - features: Enabled WebAssembly features for parsing - public init(bytes: [UInt8], features: WasmFeatureSet = .default) { - self.init(stream: StaticByteStream(bytes: bytes), features: features) - } -} - extension Parser where Stream == FileHandleStream { /// Initialize a new parser with the given file handle @@ -84,1195 +35,6 @@ extension Parser where Stream == FileHandleStream { } } -@_documentation(visibility: internal) -public struct ExpressionParser { - /// The byte offset of the code in the module - let codeOffset: Int - /// The initial byte offset of the code buffer stream - /// NOTE: This might be different from `codeOffset` if the code buffer - /// is not a part of the initial `FileHandleStream` buffer - let initialStreamOffset: Int - @usableFromInline - var parser: Parser - - /// Whether the final `end` opcode has been returned. We track this explicitly - /// rather than checking `hasReachedEnd()` upfront because an exhausted stream - /// without a preceding `end` opcode is a validation error, not a normal exit. - @usableFromInline - var reachedEnd: Bool - - public var offset: Int { - self.codeOffset + self.parser.offset - self.initialStreamOffset - } - - public init(code: Code) { - self.parser = Parser( - stream: StaticByteStream(bytes: code.expression), - features: code.features - ) - self.codeOffset = code.offset - self.initialStreamOffset = self.parser.offset - self.reachedEnd = false - } - - /// Parse the next instruction. Returns nil when expression is complete (end opcode reached at top level). - @inlinable - public mutating func parse() throws(WasmParserError) -> Visit? { - if reachedEnd { return nil } - let instructionOffset = offset - let instruction = try parser.parseInstruction() - if case .end = instruction, try parser.stream.hasReachedEnd() { - reachedEnd = true - } - return Visit(instruction: instruction, offset: instructionOffset) - } - - /// A parsed instruction ready to be dispatched to a visitor. - public struct Visit { - @usableFromInline - let instruction: Instruction - @usableFromInline - let offset: Int - - @usableFromInline - init(instruction: Instruction, offset: Int) { - self.instruction = instruction - self.offset = offset - } - - @inlinable - public func callAsFunction( - visitor: inout V - ) throws(V.VisitorError) { - visitor.binaryOffset = offset - try dispatchInstruction(instruction, to: &visitor) - } - } -} - -let WASM_MAGIC: [UInt8] = [0x00, 0x61, 0x73, 0x6D] - -/// Flags for enabling/disabling WebAssembly features -public struct WasmFeatureSet: OptionSet, Sendable { - /// The raw value of the feature set - public let rawValue: Int - - /// Initialize a new feature set with the given raw value - public init(rawValue: Int) { - self.rawValue = rawValue - } - - /// The WebAssembly memory64 proposal - @_alwaysEmitIntoClient - public static var memory64: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 0) } - /// The WebAssembly reference types proposal - @_alwaysEmitIntoClient - public static var referenceTypes: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 1) } - /// The WebAssembly threads proposal - @_alwaysEmitIntoClient - public static var threads: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 2) } - /// The WebAssembly tail-call proposal - @_alwaysEmitIntoClient - public static var tailCall: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 3) } - /// The WebAssembly SIMD proposal - @_alwaysEmitIntoClient - public static var simd: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 4) } - - /// The default feature set - public static let `default`: WasmFeatureSet = [.referenceTypes] - /// The feature set with all features enabled - public static let all: WasmFeatureSet = [.memory64, .referenceTypes, .threads, .tailCall, .simd] -} - -/// > Note: -/// -extension ByteStream { - @inlinable - func parseVector(content parser: () throws(WasmParserError) -> Content) throws(WasmParserError) -> [Content] { - var contents = [Content]() - let count: UInt32 = try parseUnsigned() - for _ in 0.. Note: -/// -extension ByteStream { - @inlinable - func parseUnsigned(_: T.Type = T.self) throws(WasmParserError) -> T { - try decodeLEB128(stream: self) - } - - @inlinable - func parseSigned() throws(WasmParserError) -> T { - try decodeLEB128(stream: self) - } - - @usableFromInline - func parseVarSigned33() throws(WasmParserError) -> Int64 { - try decodeLEB128(stream: self, bitWidth: 33) - } -} - -/// > Note: -/// -extension ByteStream { - package func parseName() throws(WasmParserError) -> String { - let bytes = try parseVector { () throws(WasmParserError) -> UInt8 in - try consumeAny() - } - - // TODO(optimize): Utilize ASCII fast path in UTF8 decoder - var name = "" - - var iterator = bytes.makeIterator() - var decoder = UTF8() - Decode: while true { - switch decoder.decode(&iterator) { - case .scalarValue(let scalar): name.append(Character(scalar)) - case .emptyInput: break Decode - case .error: throw WasmParserError(message: .invalidUTF8(bytes), offset: currentIndex) - } - } - - return name - } -} - -extension Parser { - @inlinable - func parseVector(content parser: () throws(WasmParserError) -> Content) throws(WasmParserError) -> [Content] { - try stream.parseVector(content: parser) - } - - @inline(__always) - @inlinable - func parseUnsigned(_: T.Type = T.self) throws(WasmParserError) -> T { - try stream.parseUnsigned(T.self) - } - - @inlinable - func parseInteger() throws(WasmParserError) -> T { - let signed: T.Signed = try stream.parseSigned() - return T(bitPattern: signed) - } - - func parseName() throws(WasmParserError) -> String { - try stream.parseName() - } -} - -/// > Note: -/// -extension Parser { - @usableFromInline - func parseFloat() throws(WasmParserError) -> UInt32 { - let consumedLittleEndian = try stream.consume(count: 4).reversed() - let bitPattern = consumedLittleEndian.reduce(UInt32(0)) { acc, byte in - acc << 8 + UInt32(byte) - } - return bitPattern - } - - @usableFromInline - func parseDouble() throws(WasmParserError) -> UInt64 { - let consumedLittleEndian = try stream.consume(count: 8).reversed() - let bitPattern = consumedLittleEndian.reduce(UInt64(0)) { acc, byte in - acc << 8 + UInt64(byte) - } - return bitPattern - } -} - -/// > Note: -/// -extension Parser { - /// > Note: - /// - @usableFromInline - func parseValueType() throws(WasmParserError) -> ValueType { - let b = try stream.consumeAny() - - switch b { - case 0x7F: return .i32 - case 0x7E: return .i64 - case 0x7D: return .f32 - case 0x7C: return .f64 - case 0x7B: return .v128 - default: - guard let refType = try parseReferenceType(byte: b) else { - throw makeError(.malformedValueType(b)) - } - return .ref(refType) - } - } - - /// - Returns: `nil` if the given `byte` discriminator is malformed - /// > Note: - /// - @usableFromInline - func parseReferenceType(byte: UInt8) throws(WasmParserError) -> ReferenceType? { - switch byte { - case 0x63: return try ReferenceType(isNullable: true, heapType: parseHeapType()) - case 0x64: return try ReferenceType(isNullable: false, heapType: parseHeapType()) - case 0x6F: return .externRef - case 0x70: return .funcRef - default: return nil // invalid discriminator - } - } - - /// > Note: - /// - @usableFromInline - func parseHeapType() throws(WasmParserError) -> HeapType { - let b = try stream.peek() - switch b { - case 0x6F: - _ = try stream.consumeAny() - return .externRef - case 0x70: - _ = try stream.consumeAny() - return .funcRef - default: - let rawIndex = try stream.parseVarSigned33() - guard let index = TypeIndex(exactly: rawIndex) else { - throw makeError(.invalidFunctionType(rawIndex)) - } - return .concrete(typeIndex: index) - } - } - - /// > Note: - /// - @inlinable - func parseResultType() throws(WasmParserError) -> BlockType { - guard let nextByte = try stream.peek() else { - throw makeError(.unexpectedEnd) - } - switch nextByte { - case 0x40: - _ = try stream.consumeAny() - return .empty - case 0x7B...0x7F, 0x70, 0x6F: - return try .type(parseValueType()) - default: - let rawIndex = try stream.parseVarSigned33() - guard let index = TypeIndex(exactly: rawIndex) else { - throw makeError(.invalidFunctionType(rawIndex)) - } - return .funcType(index) - } - } - - /// > Note: - /// - @inlinable - func parseFunctionType() throws(WasmParserError) -> FunctionType { - let opcode = try stream.consumeAny() - - // XXX: spectest expects the first byte should be parsed as a LEB128 with 1 byte limit - // but the spec itself doesn't require it, so just check the continue bit of LEB128 here. - guard opcode & 0b10000000 == 0 else { - throw makeError(.integerRepresentationTooLong) - } - guard opcode == 0x60 else { - throw makeError(.malformedFunctionType(opcode)) - } - - let parameters = try parseVector { () throws(WasmParserError) in try parseValueType() } - let results = try parseVector { () throws(WasmParserError) in try parseValueType() } - return FunctionType(parameters: parameters, results: results) - } - - /// > Note: - /// - @usableFromInline - func parseLimits() throws(WasmParserError) -> Limits { - let b = try stream.consumeAny() - let sharedMask: UInt8 = 0b0010 - let isMemory64Mask: UInt8 = 0b0100 - - let hasMax = b & 0b0001 != 0 - let shared = b & sharedMask != 0 - let isMemory64 = b & isMemory64Mask != 0 - - var flagMask: UInt8 = 0b0001 - if features.contains(.threads) { - flagMask |= sharedMask - } - if features.contains(.memory64) { - flagMask |= isMemory64Mask - } - guard (b & ~flagMask) == 0 else { - throw makeError(.malformedLimit(b)) - } - - let min: UInt64 - if isMemory64 { - min = try parseUnsigned(UInt64.self) - } else { - min = try UInt64(parseUnsigned(UInt32.self)) - } - var max: UInt64? - if hasMax { - if isMemory64 { - max = try parseUnsigned(UInt64.self) - } else { - max = try UInt64(parseUnsigned(UInt32.self)) - } - } - return Limits(min: min, max: max, isMemory64: isMemory64, shared: shared) - } - - /// > Note: - /// - func parseMemoryType() throws(WasmParserError) -> MemoryType { - return try parseLimits() - } - - /// > Note: - /// - @inlinable - func parseTableType() throws(WasmParserError) -> TableType { - let elementType: ReferenceType - let b = try stream.consumeAny() - - switch b { - case 0x70: - elementType = .funcRef - case 0x6F: - elementType = .externRef - default: - throw WasmParserError( - kind: .parserUnexpectedByte(b, expected: [0x6F, 0x70]), - offset: stream.currentIndex - ) - } - - let limits = try parseLimits() - return TableType(elementType: elementType, limits: limits) - } - - /// > Note: - /// - @inlinable - func parseGlobalType() throws(WasmParserError) -> GlobalType { - let valueType = try parseValueType() - let mutability = try parseMutability() - return GlobalType(mutability: mutability, valueType: valueType) - } - - @inlinable - func parseMutability() throws(WasmParserError) -> Mutability { - let b = try stream.consumeAny() - switch b { - case 0x00: - return .constant - case 0x01: - return .variable - default: - throw makeError(.malformedMutability(b)) - } - } - - /// > Note: - /// - @inlinable - func parseMemarg() throws(WasmParserError) -> MemArg { - let align: UInt32 = try parseUnsigned() - let offset: UInt64 = try features.contains(.memory64) ? parseUnsigned(UInt64.self) : UInt64(parseUnsigned(UInt32.self)) - return MemArg(offset: offset, align: align) - } - - @inlinable func parseVectorBytes() throws(WasmParserError) -> ArraySlice { - let count: UInt32 = try parseUnsigned() - return try stream.consume(count: Int(count)) - } -} - -/// > Note: -/// -extension Parser: BinaryInstructionDecoder { - @inlinable func parseMemoryIndex() throws(WasmParserError) -> UInt32 { - let zero = try stream.consumeAny() - guard zero == 0x00 else { - throw makeError(.zeroExpected(actual: zero)) - } - return 0 - } - - @inlinable func throwUnknown(_ opcode: [UInt8]) throws(WasmParserError) -> Never { - throw makeError(.illegalOpcode(opcode)) - } - - @inlinable func visitUnknown(_ opcode: [UInt8]) throws(WasmParserError) -> Bool { - try throwUnknown(opcode) - } - - @inlinable mutating func visitBlock() throws(WasmParserError) -> BlockType { try parseResultType() } - @inlinable mutating func visitLoop() throws(WasmParserError) -> BlockType { try parseResultType() } - @inlinable mutating func visitIf() throws(WasmParserError) -> BlockType { try parseResultType() } - @inlinable mutating func visitBr() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitBrIf() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitBrTable() throws(WasmParserError) -> BrTable { - let labelIndices: [UInt32] = try parseVector { () throws(WasmParserError) in try parseUnsigned() } - let labelIndex: UInt32 = try parseUnsigned() - return BrTable(labelIndices: labelIndices, defaultIndex: labelIndex) - } - @inlinable mutating func visitCall() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitCallRef() throws(WasmParserError) -> UInt32 { - // TODO reference types checks - // traps on nil - try parseUnsigned() - } - - @inlinable mutating func visitCallIndirect() throws(WasmParserError) -> (typeIndex: UInt32, tableIndex: UInt32) { - let typeIndex: TypeIndex = try parseUnsigned() - let peek = try stream.peek() - - if !features.contains(.referenceTypes) && peek != 0 { - // Check that reserved byte is zero when reference-types is disabled - throw makeError(.malformedIndirectCall) - } - let tableIndex: TableIndex = try parseUnsigned() - return (typeIndex, tableIndex) - } - - @inlinable mutating func visitReturnCall() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - - @inlinable mutating func visitReturnCallIndirect() throws(WasmParserError) -> (typeIndex: UInt32, tableIndex: UInt32) { - let typeIndex: TypeIndex = try parseUnsigned() - let tableIndex: TableIndex = try parseUnsigned() - return (typeIndex, tableIndex) - } - - @inlinable mutating func visitReturnCallRef() throws(WasmParserError) -> UInt32 { - return 0 - } - - @inlinable mutating func visitTypedSelect() throws(WasmParserError) -> WasmTypes.ValueType { - let results = try parseVector { () throws(WasmParserError) in try parseValueType() } - guard results.count == 1 else { - throw makeError(.invalidResultArity(expected: 1, actual: results.count)) - } - return results[0] - } - - @inlinable mutating func visitLocalGet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitLocalSet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitLocalTee() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitGlobalGet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitGlobalSet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitLoad(_: Instruction.Load) throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitStore(_: Instruction.Store) throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitMemorySize() throws(WasmParserError) -> UInt32 { - try parseMemoryIndex() - } - @inlinable mutating func visitMemoryGrow() throws(WasmParserError) -> UInt32 { - try parseMemoryIndex() - } - @inlinable mutating func visitI32Const() throws(WasmParserError) -> Int32 { - let n: UInt32 = try parseInteger() - return Int32(bitPattern: n) - } - @inlinable mutating func visitI64Const() throws(WasmParserError) -> Int64 { - let n: UInt64 = try parseInteger() - return Int64(bitPattern: n) - } - @inlinable mutating func visitF32Const() throws(WasmParserError) -> IEEE754.Float32 { - let n = try parseFloat() - return IEEE754.Float32(bitPattern: n) - } - @inlinable mutating func visitF64Const() throws(WasmParserError) -> IEEE754.Float64 { - let n = try parseDouble() - return IEEE754.Float64(bitPattern: n) - } - @inlinable mutating func visitRefNull() throws(WasmParserError) -> WasmTypes.HeapType { - return try parseHeapType() - } - @inlinable mutating func visitBrOnNull() throws(WasmParserError) -> UInt32 { - return 0 - } - @inlinable mutating func visitBrOnNonNull() throws(WasmParserError) -> UInt32 { - return 0 - } - - @inlinable mutating func visitRefFunc() throws(WasmParserError) -> UInt32 { try parseUnsigned() } - @inlinable mutating func visitMemoryInit() throws(WasmParserError) -> UInt32 { - let dataIndex: DataIndex = try parseUnsigned() - _ = try parseMemoryIndex() - return dataIndex - } - - @inlinable mutating func visitDataDrop() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - - @inlinable mutating func visitMemoryCopy() throws(WasmParserError) -> (dstMem: UInt32, srcMem: UInt32) { - _ = try parseMemoryIndex() - _ = try parseMemoryIndex() - return (0, 0) - } - - @inlinable mutating func visitMemoryFill() throws(WasmParserError) -> UInt32 { - let zero = try stream.consumeAny() - guard zero == 0x00 else { - throw makeError(.zeroExpected(actual: zero)) - } - return 0 - } - - @inlinable mutating func visitTableInit() throws(WasmParserError) -> (elemIndex: UInt32, table: UInt32) { - let elementIndex: ElementIndex = try parseUnsigned() - let tableIndex: TableIndex = try parseUnsigned() - return (elementIndex, tableIndex) - } - @inlinable mutating func visitElemDrop() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - @inlinable mutating func visitTableCopy() throws(WasmParserError) -> (dstTable: UInt32, srcTable: UInt32) { - let destination: TableIndex = try parseUnsigned() - let source: TableIndex = try parseUnsigned() - return (destination, source) - } - @inlinable mutating func visitTableFill() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - @inlinable mutating func visitTableGet() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - @inlinable mutating func visitTableSet() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - @inlinable mutating func visitTableGrow() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - @inlinable mutating func visitTableSize() throws(WasmParserError) -> UInt32 { - try parseUnsigned() - } - @inlinable mutating func visitMemoryAtomicNotify() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitMemoryAtomicWait32() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitMemoryAtomicWait64() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmwAdd() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmwAdd() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw8AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw16AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw8AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw16AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw32AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmwSub() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmwSub() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw8SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw16SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw8SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw16SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw32SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmwAnd() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmwAnd() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw8AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw16AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw8AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw16AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw32AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmwOr() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmwOr() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw8OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw16OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw8OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw16OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw32OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmwXor() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmwXor() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw8XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw16XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw8XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw16XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw32XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmwXchg() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmwXchg() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw8XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw16XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw8XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw16XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw32XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmwCmpxchg() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmwCmpxchg() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw8CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI32AtomicRmw16CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw8CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw16CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitI64AtomicRmw32CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } - @inlinable mutating func visitV128Const() throws(WasmParserError) -> V128 { - return V128(bytes: Array(try stream.consume(count: V128.byteCount))) - } - @inlinable mutating func visitI8x16Shuffle() throws(WasmParserError) -> V128ShuffleMask { - return V128ShuffleMask(lanes: Array(try stream.consume(count: V128ShuffleMask.laneCount))) - } - @inlinable mutating func visitSimdLane(_: Instruction.SimdLane) throws(WasmParserError) -> UInt8 { - return try stream.consumeAny() - } - @inlinable mutating func visitSimdMemLane(_: Instruction.SimdMemLane) throws(WasmParserError) -> (memarg: MemArg, lane: UInt8) { - let memarg = try parseMemarg() - let lane = try stream.consumeAny() - return (memarg: memarg, lane: lane) - } - @inlinable func claimNextByte() throws(WasmParserError) -> UInt8 { - return try stream.consumeAny() - } - - /// Parse a single binary instruction. - @inline(__always) - @inlinable - mutating func parseInstruction() throws(WasmParserError) -> Instruction { - return try parseBinaryInstruction(decoder: &self) - } - - @usableFromInline - mutating func parseConstExpression() throws(WasmParserError) -> ConstExpression { - var insts: [Instruction] = [] - while true { - let instruction = try self.parseInstruction() - insts.append(instruction) - if case .end = instruction { break } - } - return insts - } -} - -/// > Note: -/// -extension Parser { - /// > Note: - /// - @usableFromInline - func parseCustomSection(size: UInt32) throws(WasmParserError) -> CustomSection { - let preNameIndex = stream.currentIndex - let name = try parseName() - let nameSize = stream.currentIndex - preNameIndex - let contentSize = Int(size) - nameSize - - guard contentSize >= 0 else { - throw makeError(.invalidSectionSize(size)) - } - - let bytes = try stream.consume(count: contentSize) - - return CustomSection(name: name, bytes: bytes) - } - - /// > Note: - /// - @inlinable - func parseTypeSection() throws(WasmParserError) -> [FunctionType] { - return try parseVector { () throws(WasmParserError) in try parseFunctionType() } - } - - /// > Note: - /// - @usableFromInline - func parseImportSection() throws(WasmParserError) -> [Import] { - return try parseVector { () throws(WasmParserError) in - let module = try parseName() - let name = try parseName() - let descriptor = try parseImportDescriptor() - return Import(module: module, name: name, descriptor: descriptor) - } - } - - /// > Note: - /// - func parseImportDescriptor() throws(WasmParserError) -> ImportDescriptor { - let b = try stream.consume(Set(0x00...0x03)) - switch b { - case 0x00: return try .function(parseUnsigned()) - case 0x01: return try .table(parseTableType()) - case 0x02: return try .memory(parseMemoryType()) - case 0x03: return try .global(parseGlobalType()) - default: - preconditionFailure("should never reach here") - } - } - - /// > Note: - /// - @inlinable - func parseFunctionSection() throws(WasmParserError) -> [TypeIndex] { - return try parseVector { () throws(WasmParserError) in try parseUnsigned() } - } - - /// > Note: - /// - @usableFromInline - func parseTableSection() throws(WasmParserError) -> [Table] { - return try parseVector { () throws(WasmParserError) in try Table(type: parseTableType()) } - } - - /// > Note: - /// - @usableFromInline - func parseMemorySection() throws(WasmParserError) -> [Memory] { - return try parseVector { () throws(WasmParserError) in try Memory(type: parseLimits()) } - } - - /// > Note: - /// - @usableFromInline - mutating func parseGlobalSection() throws(WasmParserError) -> [Global] { - return try parseVector { () throws(WasmParserError) in - let type = try parseGlobalType() - let expression = try parseConstExpression() - return Global(type: type, initializer: expression) - } - } - - /// > Note: - /// - @usableFromInline - func parseExportSection() throws(WasmParserError) -> [Export] { - return try parseVector { () throws(WasmParserError) in - let name = try parseName() - let descriptor = try parseExportDescriptor() - return Export(name: name, descriptor: descriptor) - } - } - - /// > Note: - /// - func parseExportDescriptor() throws(WasmParserError) -> ExportDescriptor { - let b = try stream.consume(Set(0x00...0x03)) - switch b { - case 0x00: return try .function(parseUnsigned()) - case 0x01: return try .table(parseUnsigned()) - case 0x02: return try .memory(parseUnsigned()) - case 0x03: return try .global(parseUnsigned()) - default: - preconditionFailure("should never reach here") - } - } - - /// > Note: - /// - @usableFromInline - func parseStartSection() throws(WasmParserError) -> FunctionIndex { - return try parseUnsigned() - } - - /// > Note: - /// - @inlinable - mutating func parseElementSection() throws(WasmParserError) -> [ElementSegment] { - return try parseVector { () throws(WasmParserError) in - let flag = try ElementSegment.Flag(rawValue: parseUnsigned()) - - let type: ReferenceType - let initializer: [ConstExpression] - let mode: ElementSegment.Mode - - if flag.contains(.isPassiveOrDeclarative) { - if flag.contains(.isDeclarative) { - mode = .declarative - } else { - mode = .passive - } - } else { - let table: TableIndex - - if flag.contains(.hasTableIndex) { - table = try parseUnsigned() - } else { - table = 0 - } - - let offset = try parseConstExpression() - mode = .active(table: table, offset: offset) - } - - if flag.segmentHasRefType { - let valueType = try parseValueType() - - guard case .ref(let refType) = valueType else { - throw makeError(.expectedRefType(actual: valueType)) - } - - type = refType - } else { - type = .funcRef - } - - if flag.segmentHasElemKind { - // `elemkind` parsing as defined in the spec - let elemKind = try parseUnsigned() as UInt32 - guard elemKind == 0x00 else { - throw makeError(.unexpectedElementKind(expected: 0x00, actual: elemKind)) - } - } - - if flag.contains(.usesExpressions) { - initializer = try parseVector { () throws(WasmParserError) in try parseConstExpression() } - } else { - initializer = try parseVector { () throws(WasmParserError) in - try [Instruction.refFunc(functionIndex: parseUnsigned() as UInt32)] - } - } - - return ElementSegment(type: type, initializer: initializer, mode: mode) - } - } - - /// > Note: - /// - @inlinable - func parseCodeSection() throws(WasmParserError) -> [Code] { - return try parseVector { () throws(WasmParserError) in - let size = try parseUnsigned() as UInt32 - let bodyStart = stream.currentIndex - let localTypes = try parseVector { () throws(WasmParserError) -> (n: UInt32, type: ValueType) in - let n: UInt32 = try parseUnsigned() - let t = try parseValueType() - return (n, t) - } - let totalLocals = localTypes.reduce(UInt64(0)) { $0 + UInt64($1.n) } - guard totalLocals < limits.maxFunctionLocals else { - throw makeError(.tooManyLocals(totalLocals, limit: limits.maxFunctionLocals)) - } - - let locals = localTypes.flatMap { (n: UInt32, type: ValueType) in - return Array(repeating: type, count: Int(n)) - } - let expressionStart = stream.currentIndex - let expressionBytes = try stream.consume( - count: Int(size) - (expressionStart - bodyStart) - ) - return Code( - locals: locals, expression: expressionBytes, - offset: expressionStart, features: features - ) - } - } - - /// > Note: - /// - @inlinable - mutating func parseDataSection() throws(WasmParserError) -> [DataSegment] { - return try parseVector { () throws(WasmParserError) in - let kind: UInt32 = try parseUnsigned() - switch kind { - case 0: - let offset = try parseConstExpression() - let initializer = try parseVectorBytes() - return .active(.init(index: 0, offset: offset, initializer: initializer)) - - case 1: - return try .passive(parseVectorBytes()) - - case 2: - let index: UInt32 = try parseUnsigned() - let offset = try parseConstExpression() - let initializer = try parseVectorBytes() - return .active(.init(index: index, offset: offset, initializer: initializer)) - default: - throw makeError(.malformedDataSegmentKind(kind)) - } - } - } - - /// > Note: - /// - @usableFromInline - func parseDataCountSection() throws(WasmParserError) -> UInt32 { - return try parseUnsigned() - } -} - -public enum ParsingPayload { - case header(version: [UInt8]) - case customSection(CustomSection) - case typeSection([FunctionType]) - case importSection([Import]) - case functionSection([TypeIndex]) - case tableSection([Table]) - case memorySection([Memory]) - case globalSection([Global]) - case exportSection([Export]) - case startSection(FunctionIndex) - case elementSection([ElementSegment]) - case codeSection([Code]) - case dataSection([DataSegment]) - case dataCount(UInt32) -} - -/// > Note: -/// -extension Parser { - /// > Note: - /// - @usableFromInline - func parseMagicNumber() throws(WasmParserError) { - let magicNumber = try stream.consume(count: 4) - guard magicNumber.elementsEqual(WASM_MAGIC) else { - throw makeError(.invalidMagicNumber(.init(magicNumber))) - } - } - - /// > Note: - /// - @usableFromInline - func parseVersion() throws(WasmParserError) -> [UInt8] { - let version = try Array(stream.consume(count: 4)) - guard version == [0x01, 0x00, 0x00, 0x00] else { - throw makeError(.unknownVersion(.init(version))) - } - return version - } - - @usableFromInline - struct OrderTracking { - @usableFromInline - enum Order: UInt8 { - case initial = 0 - case type - case _import - case function - case table - case memory - case tag - case global - case export - case start - case element - case dataCount - case code - case data - } - - @usableFromInline - var last: Order = .initial - - @inlinable - mutating func track(order: Order, parser: Parser) throws(WasmParserError) { - guard last.rawValue < order.rawValue else { - throw parser.makeError(.sectionOutOfOrder) - } - last = order - } - } - - /// Attempts to parse a chunk of the Wasm binary stream. - /// - /// - Returns: A `ParsingPayload` if the parsing was successful, otherwise `nil`. - /// - /// > Note: - /// - /// - /// The following example demonstrates how to use the `Parser` to parse a Wasm binary stream: - /// - /// ```swift - /// import WasmParser - /// - /// var parser = Parser(bytes: [ - /// 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60, - /// 0x01, 0x7e, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x07, 0x01, 0x03, - /// 0x66, 0x61, 0x63, 0x00, 0x00, 0x0a, 0x17, 0x01, 0x15, 0x00, 0x20, 0x00, - /// 0x50, 0x04, 0x7e, 0x42, 0x01, 0x05, 0x20, 0x00, 0x20, 0x00, 0x42, 0x01, - /// 0x7d, 0x10, 0x00, 0x7e, 0x0b, 0x0b - /// ]) - /// - /// while let payload = try parser.parseNext() { - /// switch payload { - /// case .header(let version): - /// print("Wasm version: \(version)") - /// default: break - /// } - /// } - /// ``` - @inlinable - public mutating func parseNext() throws(WasmParserError) -> ParsingPayload? { - switch nextParseTarget { - case .header: - try parseMagicNumber() - let version = try parseVersion() - self.nextParseTarget = .section - return .header(version: version) - case .section: - guard try !stream.hasReachedEnd() else { - return nil - } - let sectionID = try stream.consumeAny() - let sectionSize: UInt32 = try parseUnsigned() - let sectionStart = stream.currentIndex - - let payload: ParsingPayload - let order: OrderTracking.Order? - switch sectionID { - case 0: - order = nil - payload = .customSection(try parseCustomSection(size: sectionSize)) - case 1: - order = .type - payload = .typeSection(try parseTypeSection()) - case 2: - order = ._import - payload = .importSection(try parseImportSection()) - case 3: - order = .function - payload = .functionSection(try parseFunctionSection()) - case 4: - order = .table - payload = .tableSection(try parseTableSection()) - case 5: - order = .memory - payload = .memorySection(try parseMemorySection()) - case 6: - order = .global - payload = .globalSection(try parseGlobalSection()) - case 7: - order = .export - payload = .exportSection(try parseExportSection()) - case 8: - order = .start - payload = .startSection(try parseStartSection()) - case 9: - order = .element - payload = .elementSection(try parseElementSection()) - case 10: - order = .code - payload = .codeSection(try parseCodeSection()) - case 11: - order = .data - payload = .dataSection(try parseDataSection()) - case 12: - order = .dataCount - payload = .dataCount(try parseDataCountSection()) - default: - throw makeError(.malformedSectionID(sectionID)) - } - if let order = order { - try orderTracking.track(order: order, parser: self) - } - let expectedSectionEnd = sectionStart + Int(sectionSize) - guard expectedSectionEnd == stream.currentIndex else { - throw makeError( - .sectionSizeMismatch( - sectionID: sectionID, - expected: expectedSectionEnd, - actual: offset - ) - ) - } - return payload - } - } -} - -/// A map of names by its index. -public typealias NameMap = [UInt32: String] - -/// Parsed names from a name section subsection. -public enum ParsedNames { - /// Subsection 0: Module name. - case moduleName(String) - /// Subsection 1: Function names. - case functions(NameMap) - /// Subsection 2: Local names (funcIndex → [localIndex → name]). - case locals([UInt32: NameMap]) - /// Subsection 3: Label names (funcIndex → [labelIndex → name]). - case labels([UInt32: NameMap]) - /// Subsection 4: Type names. - case types(NameMap) - /// Subsection 5: Table names. - case tables(NameMap) - /// Subsection 6: Memory names. - case memories(NameMap) - /// Subsection 7: Global names. - case globals(NameMap) - /// Subsection 8: Element segment names. - case elements(NameMap) - /// Subsection 9: Data segment names. - case dataSegments(NameMap) -} - -/// A parser for the name custom section. -/// -/// > Note: -public struct NameSectionParser { - let stream: Stream - - public init(stream: Stream) { - self.stream = stream - } - - /// Parses the entire name section. - /// - /// - Throws: If the stream is malformed or the section is invalid. - /// - Returns: A list of parsed names. - public func parseAll() throws(WasmParserError) -> [ParsedNames] { - var results: [ParsedNames] = [] - while try !stream.hasReachedEnd() { - let id = try stream.consumeAny() - guard let result = try parseNameSubsection(type: id) else { - continue - } - results.append(result) - } - return results - } - - func parseNameSubsection(type: UInt8) throws(WasmParserError) -> ParsedNames? { - let size = try stream.parseUnsigned(UInt32.self) - switch type { - case 0: return .moduleName(try stream.parseName()) - case 1: return .functions(try parseNameMap()) - case 2: return .locals(try parseIndirectNameMap()) - case 3: return .labels(try parseIndirectNameMap()) - case 4: return .types(try parseNameMap()) - case 5: return .tables(try parseNameMap()) - case 6: return .memories(try parseNameMap()) - case 7: return .globals(try parseNameMap()) - case 8: return .elements(try parseNameMap()) - case 9: return .dataSegments(try parseNameMap()) - default: - _ = try stream.consume(count: Int(size)) - return nil - } - } - - func parseNameMap() throws(WasmParserError) -> NameMap { - var nameMap: NameMap = [:] - _ = try stream.parseVector { () throws(WasmParserError) in - let index = try stream.parseUnsigned(UInt32.self) - let name = try stream.parseName() - nameMap[index] = name - } - return nameMap - } - - func parseIndirectNameMap() throws(WasmParserError) -> [UInt32: NameMap] { - var map: [UInt32: NameMap] = [:] - _ = try stream.parseVector { () throws(WasmParserError) in - let outerIndex = try stream.parseUnsigned(UInt32.self) - map[outerIndex] = try parseNameMap() - } - return map - } -} - -// MARK: - File Type Detection - -/// The type of a WebAssembly binary file. -public enum WasmFileType: Equatable, Sendable { - /// A core WebAssembly module (version 1) - case coreModule - /// A WebAssembly component (version 0x0d, layer 1) - case component - /// Unknown or invalid WebAssembly file - case unknown -} - /// Detect the type of a WebAssembly binary file by reading its header. /// /// This function reads the 8-byte WebAssembly header to determine whether diff --git a/Sources/WasmParser/BinaryInstructionDecoder.swift b/Sources/WasmParserCore/BinaryInstructionDecoder.swift similarity index 100% rename from Sources/WasmParser/BinaryInstructionDecoder.swift rename to Sources/WasmParserCore/BinaryInstructionDecoder.swift diff --git a/Sources/WasmParserCore/CMakeLists.txt b/Sources/WasmParserCore/CMakeLists.txt new file mode 100644 index 00000000..025c6842 --- /dev/null +++ b/Sources/WasmParserCore/CMakeLists.txt @@ -0,0 +1,14 @@ +add_wasmkit_library(WasmParserCore + Stream/ByteStream.swift + BinaryInstructionDecoder.swift + ComponentParser.swift + InstructionVisitor.swift + LEB.swift + ParsingLimits.swift + WasmParserError.swift + WasmParserCore.swift + WasmTypes.swift +) + +target_link_wasmkit_libraries(WasmParserCore PUBLIC + WasmTypes SystemPackage) diff --git a/Sources/WasmParser/ComponentParser.swift b/Sources/WasmParserCore/ComponentParser.swift similarity index 100% rename from Sources/WasmParser/ComponentParser.swift rename to Sources/WasmParserCore/ComponentParser.swift diff --git a/Sources/WasmParser/InstructionVisitor.swift b/Sources/WasmParserCore/InstructionVisitor.swift similarity index 100% rename from Sources/WasmParser/InstructionVisitor.swift rename to Sources/WasmParserCore/InstructionVisitor.swift diff --git a/Sources/WasmParser/LEB.swift b/Sources/WasmParserCore/LEB.swift similarity index 100% rename from Sources/WasmParser/LEB.swift rename to Sources/WasmParserCore/LEB.swift diff --git a/Sources/WasmParser/ParsingLimits.swift b/Sources/WasmParserCore/ParsingLimits.swift similarity index 100% rename from Sources/WasmParser/ParsingLimits.swift rename to Sources/WasmParserCore/ParsingLimits.swift diff --git a/Sources/WasmParser/Stream/ByteStream.swift b/Sources/WasmParserCore/Stream/ByteStream.swift similarity index 100% rename from Sources/WasmParser/Stream/ByteStream.swift rename to Sources/WasmParserCore/Stream/ByteStream.swift diff --git a/Sources/WasmParserCore/WasmParserCore.swift b/Sources/WasmParserCore/WasmParserCore.swift new file mode 100644 index 00000000..10d660a2 --- /dev/null +++ b/Sources/WasmParserCore/WasmParserCore.swift @@ -0,0 +1,1237 @@ +import WasmTypes + +/// A streaming parser for WebAssembly binary format. +/// +/// The parser is designed to be used to incrementally parse a WebAssembly binary bytestream. +public struct Parser { + @usableFromInline + let stream: Stream + @usableFromInline let limits: ParsingLimits + @usableFromInline var orderTracking = OrderTracking() + + @usableFromInline + enum NextParseTarget { + case header + case section + } + @usableFromInline + var nextParseTarget: NextParseTarget + + public let features: WasmFeatureSet + public var offset: Int { + return stream.currentIndex + } + + public init(stream: Stream, features: WasmFeatureSet = .default) { + self.stream = stream + self.features = features + self.nextParseTarget = .header + self.limits = .default + } + + @usableFromInline + internal func makeError(_ message: WasmParserError.Message) -> WasmParserError { + return WasmParserError(message: message, offset: offset) + } +} + +extension Parser where Stream == StaticByteStream { + + /// Initialize a new parser with the given bytes + /// + /// - Parameters: + /// - bytes: The bytes of the WebAssembly binary file to parse + /// - features: Enabled WebAssembly features for parsing + public init(bytes: [UInt8], features: WasmFeatureSet = .default) { + self.init(stream: StaticByteStream(bytes: bytes), features: features) + } +} + +@_documentation(visibility: internal) +public struct ExpressionParser { + /// The byte offset of the code in the module + let codeOffset: Int + /// The initial byte offset of the code buffer stream + /// NOTE: This might be different from `codeOffset` if the code buffer + /// is not a part of the initial `FileHandleStream` buffer + let initialStreamOffset: Int + @usableFromInline + var parser: Parser + + /// Whether the final `end` opcode has been returned. We track this explicitly + /// rather than checking `hasReachedEnd()` upfront because an exhausted stream + /// without a preceding `end` opcode is a validation error, not a normal exit. + @usableFromInline + var reachedEnd: Bool + + public var offset: Int { + self.codeOffset + self.parser.offset - self.initialStreamOffset + } + + public init(code: Code) { + self.parser = Parser( + stream: StaticByteStream(bytes: code.expression), + features: code.features + ) + self.codeOffset = code.offset + self.initialStreamOffset = self.parser.offset + self.reachedEnd = false + } + + /// Parse the next instruction. Returns nil when expression is complete (end opcode reached at top level). + @inlinable + public mutating func parse() throws(WasmParserError) -> Visit? { + if reachedEnd { return nil } + let instructionOffset = offset + let instruction = try parser.parseInstruction() + if case .end = instruction, try parser.stream.hasReachedEnd() { + reachedEnd = true + } + return Visit(instruction: instruction, offset: instructionOffset) + } + + /// A parsed instruction ready to be dispatched to a visitor. + public struct Visit { + @usableFromInline + let instruction: Instruction + @usableFromInline + let offset: Int + + @usableFromInline + init(instruction: Instruction, offset: Int) { + self.instruction = instruction + self.offset = offset + } + + @inlinable + public func callAsFunction( + visitor: inout V + ) throws(V.VisitorError) { + visitor.binaryOffset = offset + try dispatchInstruction(instruction, to: &visitor) + } + } +} + +package let WASM_MAGIC: [UInt8] = [0x00, 0x61, 0x73, 0x6D] + +/// Flags for enabling/disabling WebAssembly features +public struct WasmFeatureSet: OptionSet, Sendable { + /// The raw value of the feature set + public let rawValue: Int + + /// Initialize a new feature set with the given raw value + public init(rawValue: Int) { + self.rawValue = rawValue + } + + /// The WebAssembly memory64 proposal + @_alwaysEmitIntoClient + public static var memory64: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 0) } + /// The WebAssembly reference types proposal + @_alwaysEmitIntoClient + public static var referenceTypes: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 1) } + /// The WebAssembly threads proposal + @_alwaysEmitIntoClient + public static var threads: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 2) } + /// The WebAssembly tail-call proposal + @_alwaysEmitIntoClient + public static var tailCall: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 3) } + /// The WebAssembly SIMD proposal + @_alwaysEmitIntoClient + public static var simd: WasmFeatureSet { WasmFeatureSet(rawValue: 1 << 4) } + + /// The default feature set + public static let `default`: WasmFeatureSet = [.referenceTypes] + /// The feature set with all features enabled + public static let all: WasmFeatureSet = [.memory64, .referenceTypes, .threads, .tailCall, .simd] +} + +/// > Note: +/// +extension ByteStream { + @inlinable + func parseVector(content parser: () throws(WasmParserError) -> Content) throws(WasmParserError) -> [Content] { + var contents = [Content]() + let count: UInt32 = try parseUnsigned() + for _ in 0.. Note: +/// +extension ByteStream { + @inlinable + func parseUnsigned(_: T.Type = T.self) throws(WasmParserError) -> T { + try decodeLEB128(stream: self) + } + + @inlinable + func parseSigned() throws(WasmParserError) -> T { + try decodeLEB128(stream: self) + } + + @usableFromInline + func parseVarSigned33() throws(WasmParserError) -> Int64 { + try decodeLEB128(stream: self, bitWidth: 33) + } +} + +/// > Note: +/// +extension ByteStream { + package func parseName() throws(WasmParserError) -> String { + let bytes = try parseVector { () throws(WasmParserError) -> UInt8 in + try consumeAny() + } + + // TODO(optimize): Utilize ASCII fast path in UTF8 decoder + var name = "" + + var iterator = bytes.makeIterator() + var decoder = UTF8() + Decode: while true { + switch decoder.decode(&iterator) { + case .scalarValue(let scalar): name.append(Character(scalar)) + case .emptyInput: break Decode + case .error: throw WasmParserError(message: .invalidUTF8(bytes), offset: currentIndex) + } + } + + return name + } +} + +extension Parser { + @inlinable + func parseVector(content parser: () throws(WasmParserError) -> Content) throws(WasmParserError) -> [Content] { + try stream.parseVector(content: parser) + } + + @inline(__always) + @inlinable + func parseUnsigned(_: T.Type = T.self) throws(WasmParserError) -> T { + try stream.parseUnsigned(T.self) + } + + @inlinable + func parseInteger() throws(WasmParserError) -> T { + let signed: T.Signed = try stream.parseSigned() + return T(bitPattern: signed) + } + + func parseName() throws(WasmParserError) -> String { + try stream.parseName() + } +} + +/// > Note: +/// +extension Parser { + @usableFromInline + func parseFloat() throws(WasmParserError) -> UInt32 { + let consumedLittleEndian = try stream.consume(count: 4).reversed() + let bitPattern = consumedLittleEndian.reduce(UInt32(0)) { acc, byte in + acc << 8 + UInt32(byte) + } + return bitPattern + } + + @usableFromInline + func parseDouble() throws(WasmParserError) -> UInt64 { + let consumedLittleEndian = try stream.consume(count: 8).reversed() + let bitPattern = consumedLittleEndian.reduce(UInt64(0)) { acc, byte in + acc << 8 + UInt64(byte) + } + return bitPattern + } +} + +/// > Note: +/// +extension Parser { + /// > Note: + /// + @usableFromInline + func parseValueType() throws(WasmParserError) -> ValueType { + let b = try stream.consumeAny() + + switch b { + case 0x7F: return .i32 + case 0x7E: return .i64 + case 0x7D: return .f32 + case 0x7C: return .f64 + case 0x7B: return .v128 + default: + guard let refType = try parseReferenceType(byte: b) else { + throw makeError(.malformedValueType(b)) + } + return .ref(refType) + } + } + + /// - Returns: `nil` if the given `byte` discriminator is malformed + /// > Note: + /// + @usableFromInline + func parseReferenceType(byte: UInt8) throws(WasmParserError) -> ReferenceType? { + switch byte { + case 0x63: return try ReferenceType(isNullable: true, heapType: parseHeapType()) + case 0x64: return try ReferenceType(isNullable: false, heapType: parseHeapType()) + case 0x6F: return .externRef + case 0x70: return .funcRef + default: return nil // invalid discriminator + } + } + + /// > Note: + /// + @usableFromInline + func parseHeapType() throws(WasmParserError) -> HeapType { + let b = try stream.peek() + switch b { + case 0x6F: + _ = try stream.consumeAny() + return .externRef + case 0x70: + _ = try stream.consumeAny() + return .funcRef + default: + let rawIndex = try stream.parseVarSigned33() + guard let index = TypeIndex(exactly: rawIndex) else { + throw makeError(.invalidFunctionType(rawIndex)) + } + return .concrete(typeIndex: index) + } + } + + /// > Note: + /// + @inlinable + func parseResultType() throws(WasmParserError) -> BlockType { + guard let nextByte = try stream.peek() else { + throw makeError(.unexpectedEnd) + } + switch nextByte { + case 0x40: + _ = try stream.consumeAny() + return .empty + case 0x7B...0x7F, 0x70, 0x6F: + return try .type(parseValueType()) + default: + let rawIndex = try stream.parseVarSigned33() + guard let index = TypeIndex(exactly: rawIndex) else { + throw makeError(.invalidFunctionType(rawIndex)) + } + return .funcType(index) + } + } + + /// > Note: + /// + @inlinable + func parseFunctionType() throws(WasmParserError) -> FunctionType { + let opcode = try stream.consumeAny() + + // XXX: spectest expects the first byte should be parsed as a LEB128 with 1 byte limit + // but the spec itself doesn't require it, so just check the continue bit of LEB128 here. + guard opcode & 0b10000000 == 0 else { + throw makeError(.integerRepresentationTooLong) + } + guard opcode == 0x60 else { + throw makeError(.malformedFunctionType(opcode)) + } + + let parameters = try parseVector { () throws(WasmParserError) in try parseValueType() } + let results = try parseVector { () throws(WasmParserError) in try parseValueType() } + return FunctionType(parameters: parameters, results: results) + } + + /// > Note: + /// + @usableFromInline + func parseLimits() throws(WasmParserError) -> Limits { + let b = try stream.consumeAny() + let sharedMask: UInt8 = 0b0010 + let isMemory64Mask: UInt8 = 0b0100 + + let hasMax = b & 0b0001 != 0 + let shared = b & sharedMask != 0 + let isMemory64 = b & isMemory64Mask != 0 + + var flagMask: UInt8 = 0b0001 + if features.contains(.threads) { + flagMask |= sharedMask + } + if features.contains(.memory64) { + flagMask |= isMemory64Mask + } + guard (b & ~flagMask) == 0 else { + throw makeError(.malformedLimit(b)) + } + + let min: UInt64 + if isMemory64 { + min = try parseUnsigned(UInt64.self) + } else { + min = try UInt64(parseUnsigned(UInt32.self)) + } + var max: UInt64? + if hasMax { + if isMemory64 { + max = try parseUnsigned(UInt64.self) + } else { + max = try UInt64(parseUnsigned(UInt32.self)) + } + } + return Limits(min: min, max: max, isMemory64: isMemory64, shared: shared) + } + + /// > Note: + /// + func parseMemoryType() throws(WasmParserError) -> MemoryType { + return try parseLimits() + } + + /// > Note: + /// + @inlinable + func parseTableType() throws(WasmParserError) -> TableType { + let elementType: ReferenceType + let b = try stream.consumeAny() + + switch b { + case 0x70: + elementType = .funcRef + case 0x6F: + elementType = .externRef + default: + throw WasmParserError( + kind: .parserUnexpectedByte(b, expected: [0x6F, 0x70]), + offset: stream.currentIndex + ) + } + + let limits = try parseLimits() + return TableType(elementType: elementType, limits: limits) + } + + /// > Note: + /// + @inlinable + func parseGlobalType() throws(WasmParserError) -> GlobalType { + let valueType = try parseValueType() + let mutability = try parseMutability() + return GlobalType(mutability: mutability, valueType: valueType) + } + + @inlinable + func parseMutability() throws(WasmParserError) -> Mutability { + let b = try stream.consumeAny() + switch b { + case 0x00: + return .constant + case 0x01: + return .variable + default: + throw makeError(.malformedMutability(b)) + } + } + + /// > Note: + /// + @inlinable + func parseMemarg() throws(WasmParserError) -> MemArg { + let align: UInt32 = try parseUnsigned() + let offset: UInt64 = try features.contains(.memory64) ? parseUnsigned(UInt64.self) : UInt64(parseUnsigned(UInt32.self)) + return MemArg(offset: offset, align: align) + } + + @inlinable func parseVectorBytes() throws(WasmParserError) -> ArraySlice { + let count: UInt32 = try parseUnsigned() + return try stream.consume(count: Int(count)) + } +} + +/// > Note: +/// +extension Parser: BinaryInstructionDecoder { + @inlinable func parseMemoryIndex() throws(WasmParserError) -> UInt32 { + let zero = try stream.consumeAny() + guard zero == 0x00 else { + throw makeError(.zeroExpected(actual: zero)) + } + return 0 + } + + @inlinable func throwUnknown(_ opcode: [UInt8]) throws(WasmParserError) -> Never { + throw makeError(.illegalOpcode(opcode)) + } + + @inlinable func visitUnknown(_ opcode: [UInt8]) throws(WasmParserError) -> Bool { + try throwUnknown(opcode) + } + + @inlinable mutating func visitBlock() throws(WasmParserError) -> BlockType { try parseResultType() } + @inlinable mutating func visitLoop() throws(WasmParserError) -> BlockType { try parseResultType() } + @inlinable mutating func visitIf() throws(WasmParserError) -> BlockType { try parseResultType() } + @inlinable mutating func visitBr() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitBrIf() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitBrTable() throws(WasmParserError) -> BrTable { + let labelIndices: [UInt32] = try parseVector { () throws(WasmParserError) in try parseUnsigned() } + let labelIndex: UInt32 = try parseUnsigned() + return BrTable(labelIndices: labelIndices, defaultIndex: labelIndex) + } + @inlinable mutating func visitCall() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitCallRef() throws(WasmParserError) -> UInt32 { + // TODO reference types checks + // traps on nil + try parseUnsigned() + } + + @inlinable mutating func visitCallIndirect() throws(WasmParserError) -> (typeIndex: UInt32, tableIndex: UInt32) { + let typeIndex: TypeIndex = try parseUnsigned() + let peek = try stream.peek() + + if !features.contains(.referenceTypes) && peek != 0 { + // Check that reserved byte is zero when reference-types is disabled + throw makeError(.malformedIndirectCall) + } + let tableIndex: TableIndex = try parseUnsigned() + return (typeIndex, tableIndex) + } + + @inlinable mutating func visitReturnCall() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + + @inlinable mutating func visitReturnCallIndirect() throws(WasmParserError) -> (typeIndex: UInt32, tableIndex: UInt32) { + let typeIndex: TypeIndex = try parseUnsigned() + let tableIndex: TableIndex = try parseUnsigned() + return (typeIndex, tableIndex) + } + + @inlinable mutating func visitReturnCallRef() throws(WasmParserError) -> UInt32 { + return 0 + } + + @inlinable mutating func visitTypedSelect() throws(WasmParserError) -> WasmTypes.ValueType { + let results = try parseVector { () throws(WasmParserError) in try parseValueType() } + guard results.count == 1 else { + throw makeError(.invalidResultArity(expected: 1, actual: results.count)) + } + return results[0] + } + + @inlinable mutating func visitLocalGet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitLocalSet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitLocalTee() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitGlobalGet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitGlobalSet() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitLoad(_: Instruction.Load) throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitStore(_: Instruction.Store) throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitMemorySize() throws(WasmParserError) -> UInt32 { + try parseMemoryIndex() + } + @inlinable mutating func visitMemoryGrow() throws(WasmParserError) -> UInt32 { + try parseMemoryIndex() + } + @inlinable mutating func visitI32Const() throws(WasmParserError) -> Int32 { + let n: UInt32 = try parseInteger() + return Int32(bitPattern: n) + } + @inlinable mutating func visitI64Const() throws(WasmParserError) -> Int64 { + let n: UInt64 = try parseInteger() + return Int64(bitPattern: n) + } + @inlinable mutating func visitF32Const() throws(WasmParserError) -> IEEE754.Float32 { + let n = try parseFloat() + return IEEE754.Float32(bitPattern: n) + } + @inlinable mutating func visitF64Const() throws(WasmParserError) -> IEEE754.Float64 { + let n = try parseDouble() + return IEEE754.Float64(bitPattern: n) + } + @inlinable mutating func visitRefNull() throws(WasmParserError) -> WasmTypes.HeapType { + return try parseHeapType() + } + @inlinable mutating func visitBrOnNull() throws(WasmParserError) -> UInt32 { + return 0 + } + @inlinable mutating func visitBrOnNonNull() throws(WasmParserError) -> UInt32 { + return 0 + } + + @inlinable mutating func visitRefFunc() throws(WasmParserError) -> UInt32 { try parseUnsigned() } + @inlinable mutating func visitMemoryInit() throws(WasmParserError) -> UInt32 { + let dataIndex: DataIndex = try parseUnsigned() + _ = try parseMemoryIndex() + return dataIndex + } + + @inlinable mutating func visitDataDrop() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + + @inlinable mutating func visitMemoryCopy() throws(WasmParserError) -> (dstMem: UInt32, srcMem: UInt32) { + _ = try parseMemoryIndex() + _ = try parseMemoryIndex() + return (0, 0) + } + + @inlinable mutating func visitMemoryFill() throws(WasmParserError) -> UInt32 { + let zero = try stream.consumeAny() + guard zero == 0x00 else { + throw makeError(.zeroExpected(actual: zero)) + } + return 0 + } + + @inlinable mutating func visitTableInit() throws(WasmParserError) -> (elemIndex: UInt32, table: UInt32) { + let elementIndex: ElementIndex = try parseUnsigned() + let tableIndex: TableIndex = try parseUnsigned() + return (elementIndex, tableIndex) + } + @inlinable mutating func visitElemDrop() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + @inlinable mutating func visitTableCopy() throws(WasmParserError) -> (dstTable: UInt32, srcTable: UInt32) { + let destination: TableIndex = try parseUnsigned() + let source: TableIndex = try parseUnsigned() + return (destination, source) + } + @inlinable mutating func visitTableFill() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + @inlinable mutating func visitTableGet() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + @inlinable mutating func visitTableSet() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + @inlinable mutating func visitTableGrow() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + @inlinable mutating func visitTableSize() throws(WasmParserError) -> UInt32 { + try parseUnsigned() + } + @inlinable mutating func visitMemoryAtomicNotify() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitMemoryAtomicWait32() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitMemoryAtomicWait64() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmwAdd() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmwAdd() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw8AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw16AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw8AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw16AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw32AddU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmwSub() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmwSub() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw8SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw16SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw8SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw16SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw32SubU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmwAnd() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmwAnd() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw8AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw16AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw8AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw16AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw32AndU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmwOr() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmwOr() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw8OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw16OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw8OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw16OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw32OrU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmwXor() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmwXor() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw8XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw16XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw8XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw16XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw32XorU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmwXchg() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmwXchg() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw8XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw16XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw8XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw16XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw32XchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmwCmpxchg() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmwCmpxchg() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw8CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI32AtomicRmw16CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw8CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw16CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitI64AtomicRmw32CmpxchgU() throws(WasmParserError) -> MemArg { try parseMemarg() } + @inlinable mutating func visitV128Const() throws(WasmParserError) -> V128 { + return V128(bytes: Array(try stream.consume(count: V128.byteCount))) + } + @inlinable mutating func visitI8x16Shuffle() throws(WasmParserError) -> V128ShuffleMask { + return V128ShuffleMask(lanes: Array(try stream.consume(count: V128ShuffleMask.laneCount))) + } + @inlinable mutating func visitSimdLane(_: Instruction.SimdLane) throws(WasmParserError) -> UInt8 { + return try stream.consumeAny() + } + @inlinable mutating func visitSimdMemLane(_: Instruction.SimdMemLane) throws(WasmParserError) -> (memarg: MemArg, lane: UInt8) { + let memarg = try parseMemarg() + let lane = try stream.consumeAny() + return (memarg: memarg, lane: lane) + } + @inlinable func claimNextByte() throws(WasmParserError) -> UInt8 { + return try stream.consumeAny() + } + + /// Parse a single binary instruction. + @inline(__always) + @inlinable + mutating func parseInstruction() throws(WasmParserError) -> Instruction { + return try parseBinaryInstruction(decoder: &self) + } + + @usableFromInline + mutating func parseConstExpression() throws(WasmParserError) -> ConstExpression { + var insts: [Instruction] = [] + while true { + let instruction = try self.parseInstruction() + insts.append(instruction) + if case .end = instruction { break } + } + return insts + } +} + +/// > Note: +/// +extension Parser { + /// > Note: + /// + @usableFromInline + func parseCustomSection(size: UInt32) throws(WasmParserError) -> CustomSection { + let preNameIndex = stream.currentIndex + let name = try parseName() + let nameSize = stream.currentIndex - preNameIndex + let contentSize = Int(size) - nameSize + + guard contentSize >= 0 else { + throw makeError(.invalidSectionSize(size)) + } + + let bytes = try stream.consume(count: contentSize) + + return CustomSection(name: name, bytes: bytes) + } + + /// > Note: + /// + @inlinable + func parseTypeSection() throws(WasmParserError) -> [FunctionType] { + return try parseVector { () throws(WasmParserError) in try parseFunctionType() } + } + + /// > Note: + /// + @usableFromInline + func parseImportSection() throws(WasmParserError) -> [Import] { + return try parseVector { () throws(WasmParserError) in + let module = try parseName() + let name = try parseName() + let descriptor = try parseImportDescriptor() + return Import(module: module, name: name, descriptor: descriptor) + } + } + + /// > Note: + /// + func parseImportDescriptor() throws(WasmParserError) -> ImportDescriptor { + let b = try stream.consume(Set(0x00...0x03)) + switch b { + case 0x00: return try .function(parseUnsigned()) + case 0x01: return try .table(parseTableType()) + case 0x02: return try .memory(parseMemoryType()) + case 0x03: return try .global(parseGlobalType()) + default: + preconditionFailure("should never reach here") + } + } + + /// > Note: + /// + @inlinable + func parseFunctionSection() throws(WasmParserError) -> [TypeIndex] { + return try parseVector { () throws(WasmParserError) in try parseUnsigned() } + } + + /// > Note: + /// + @usableFromInline + func parseTableSection() throws(WasmParserError) -> [Table] { + return try parseVector { () throws(WasmParserError) in try Table(type: parseTableType()) } + } + + /// > Note: + /// + @usableFromInline + func parseMemorySection() throws(WasmParserError) -> [Memory] { + return try parseVector { () throws(WasmParserError) in try Memory(type: parseLimits()) } + } + + /// > Note: + /// + @usableFromInline + mutating func parseGlobalSection() throws(WasmParserError) -> [Global] { + return try parseVector { () throws(WasmParserError) in + let type = try parseGlobalType() + let expression = try parseConstExpression() + return Global(type: type, initializer: expression) + } + } + + /// > Note: + /// + @usableFromInline + func parseExportSection() throws(WasmParserError) -> [Export] { + return try parseVector { () throws(WasmParserError) in + let name = try parseName() + let descriptor = try parseExportDescriptor() + return Export(name: name, descriptor: descriptor) + } + } + + /// > Note: + /// + func parseExportDescriptor() throws(WasmParserError) -> ExportDescriptor { + let b = try stream.consume(Set(0x00...0x03)) + switch b { + case 0x00: return try .function(parseUnsigned()) + case 0x01: return try .table(parseUnsigned()) + case 0x02: return try .memory(parseUnsigned()) + case 0x03: return try .global(parseUnsigned()) + default: + preconditionFailure("should never reach here") + } + } + + /// > Note: + /// + @usableFromInline + func parseStartSection() throws(WasmParserError) -> FunctionIndex { + return try parseUnsigned() + } + + /// > Note: + /// + @inlinable + mutating func parseElementSection() throws(WasmParserError) -> [ElementSegment] { + return try parseVector { () throws(WasmParserError) in + let flag = try ElementSegment.Flag(rawValue: parseUnsigned()) + + let type: ReferenceType + let initializer: [ConstExpression] + let mode: ElementSegment.Mode + + if flag.contains(.isPassiveOrDeclarative) { + if flag.contains(.isDeclarative) { + mode = .declarative + } else { + mode = .passive + } + } else { + let table: TableIndex + + if flag.contains(.hasTableIndex) { + table = try parseUnsigned() + } else { + table = 0 + } + + let offset = try parseConstExpression() + mode = .active(table: table, offset: offset) + } + + if flag.segmentHasRefType { + let valueType = try parseValueType() + + guard case .ref(let refType) = valueType else { + throw makeError(.expectedRefType(actual: valueType)) + } + + type = refType + } else { + type = .funcRef + } + + if flag.segmentHasElemKind { + // `elemkind` parsing as defined in the spec + let elemKind = try parseUnsigned() as UInt32 + guard elemKind == 0x00 else { + throw makeError(.unexpectedElementKind(expected: 0x00, actual: elemKind)) + } + } + + if flag.contains(.usesExpressions) { + initializer = try parseVector { () throws(WasmParserError) in try parseConstExpression() } + } else { + initializer = try parseVector { () throws(WasmParserError) in + try [Instruction.refFunc(functionIndex: parseUnsigned() as UInt32)] + } + } + + return ElementSegment(type: type, initializer: initializer, mode: mode) + } + } + + /// > Note: + /// + @inlinable + func parseCodeSection() throws(WasmParserError) -> [Code] { + return try parseVector { () throws(WasmParserError) in + let size = try parseUnsigned() as UInt32 + let bodyStart = stream.currentIndex + let localTypes = try parseVector { () throws(WasmParserError) -> (n: UInt32, type: ValueType) in + let n: UInt32 = try parseUnsigned() + let t = try parseValueType() + return (n, t) + } + let totalLocals = localTypes.reduce(UInt64(0)) { $0 + UInt64($1.n) } + guard totalLocals < limits.maxFunctionLocals else { + throw makeError(.tooManyLocals(totalLocals, limit: limits.maxFunctionLocals)) + } + + let locals = localTypes.flatMap { (n: UInt32, type: ValueType) in + return Array(repeating: type, count: Int(n)) + } + let expressionStart = stream.currentIndex + let expressionBytes = try stream.consume( + count: Int(size) - (expressionStart - bodyStart) + ) + return Code( + locals: locals, expression: expressionBytes, + offset: expressionStart, features: features + ) + } + } + + /// > Note: + /// + @inlinable + mutating func parseDataSection() throws(WasmParserError) -> [DataSegment] { + return try parseVector { () throws(WasmParserError) in + let kind: UInt32 = try parseUnsigned() + switch kind { + case 0: + let offset = try parseConstExpression() + let initializer = try parseVectorBytes() + return .active(.init(index: 0, offset: offset, initializer: initializer)) + + case 1: + return try .passive(parseVectorBytes()) + + case 2: + let index: UInt32 = try parseUnsigned() + let offset = try parseConstExpression() + let initializer = try parseVectorBytes() + return .active(.init(index: index, offset: offset, initializer: initializer)) + default: + throw makeError(.malformedDataSegmentKind(kind)) + } + } + } + + /// > Note: + /// + @usableFromInline + func parseDataCountSection() throws(WasmParserError) -> UInt32 { + return try parseUnsigned() + } +} + +public enum ParsingPayload { + case header(version: [UInt8]) + case customSection(CustomSection) + case typeSection([FunctionType]) + case importSection([Import]) + case functionSection([TypeIndex]) + case tableSection([Table]) + case memorySection([Memory]) + case globalSection([Global]) + case exportSection([Export]) + case startSection(FunctionIndex) + case elementSection([ElementSegment]) + case codeSection([Code]) + case dataSection([DataSegment]) + case dataCount(UInt32) +} + +/// > Note: +/// +extension Parser { + /// > Note: + /// + @usableFromInline + func parseMagicNumber() throws(WasmParserError) { + let magicNumber = try stream.consume(count: 4) + guard magicNumber.elementsEqual(WASM_MAGIC) else { + throw makeError(.invalidMagicNumber(.init(magicNumber))) + } + } + + /// > Note: + /// + @usableFromInline + func parseVersion() throws(WasmParserError) -> [UInt8] { + let version = try Array(stream.consume(count: 4)) + guard version == [0x01, 0x00, 0x00, 0x00] else { + throw makeError(.unknownVersion(.init(version))) + } + return version + } + + @usableFromInline + struct OrderTracking { + @usableFromInline + enum Order: UInt8 { + case initial = 0 + case type + case _import + case function + case table + case memory + case tag + case global + case export + case start + case element + case dataCount + case code + case data + } + + @usableFromInline + var last: Order = .initial + + @inlinable + mutating func track(order: Order, parser: Parser) throws(WasmParserError) { + guard last.rawValue < order.rawValue else { + throw parser.makeError(.sectionOutOfOrder) + } + last = order + } + } + + /// Attempts to parse a chunk of the Wasm binary stream. + /// + /// - Returns: A `ParsingPayload` if the parsing was successful, otherwise `nil`. + /// + /// > Note: + /// + /// + /// The following example demonstrates how to use the `Parser` to parse a Wasm binary stream: + /// + /// ```swift + /// import WasmParser + /// + /// var parser = Parser(bytes: [ + /// 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60, + /// 0x01, 0x7e, 0x01, 0x7e, 0x03, 0x02, 0x01, 0x00, 0x07, 0x07, 0x01, 0x03, + /// 0x66, 0x61, 0x63, 0x00, 0x00, 0x0a, 0x17, 0x01, 0x15, 0x00, 0x20, 0x00, + /// 0x50, 0x04, 0x7e, 0x42, 0x01, 0x05, 0x20, 0x00, 0x20, 0x00, 0x42, 0x01, + /// 0x7d, 0x10, 0x00, 0x7e, 0x0b, 0x0b + /// ]) + /// + /// while let payload = try parser.parseNext() { + /// switch payload { + /// case .header(let version): + /// print("Wasm version: \(version)") + /// default: break + /// } + /// } + /// ``` + @inlinable + public mutating func parseNext() throws(WasmParserError) -> ParsingPayload? { + switch nextParseTarget { + case .header: + try parseMagicNumber() + let version = try parseVersion() + self.nextParseTarget = .section + return .header(version: version) + case .section: + guard try !stream.hasReachedEnd() else { + return nil + } + let sectionID = try stream.consumeAny() + let sectionSize: UInt32 = try parseUnsigned() + let sectionStart = stream.currentIndex + + let payload: ParsingPayload + let order: OrderTracking.Order? + switch sectionID { + case 0: + order = nil + payload = .customSection(try parseCustomSection(size: sectionSize)) + case 1: + order = .type + payload = .typeSection(try parseTypeSection()) + case 2: + order = ._import + payload = .importSection(try parseImportSection()) + case 3: + order = .function + payload = .functionSection(try parseFunctionSection()) + case 4: + order = .table + payload = .tableSection(try parseTableSection()) + case 5: + order = .memory + payload = .memorySection(try parseMemorySection()) + case 6: + order = .global + payload = .globalSection(try parseGlobalSection()) + case 7: + order = .export + payload = .exportSection(try parseExportSection()) + case 8: + order = .start + payload = .startSection(try parseStartSection()) + case 9: + order = .element + payload = .elementSection(try parseElementSection()) + case 10: + order = .code + payload = .codeSection(try parseCodeSection()) + case 11: + order = .data + payload = .dataSection(try parseDataSection()) + case 12: + order = .dataCount + payload = .dataCount(try parseDataCountSection()) + default: + throw makeError(.malformedSectionID(sectionID)) + } + if let order = order { + try orderTracking.track(order: order, parser: self) + } + let expectedSectionEnd = sectionStart + Int(sectionSize) + guard expectedSectionEnd == stream.currentIndex else { + throw makeError( + .sectionSizeMismatch( + sectionID: sectionID, + expected: expectedSectionEnd, + actual: offset + ) + ) + } + return payload + } + } +} + +/// A map of names by its index. +public typealias NameMap = [UInt32: String] + +/// Parsed names from a name section subsection. +public enum ParsedNames { + /// Subsection 0: Module name. + case moduleName(String) + /// Subsection 1: Function names. + case functions(NameMap) + /// Subsection 2: Local names (funcIndex → [localIndex → name]). + case locals([UInt32: NameMap]) + /// Subsection 3: Label names (funcIndex → [labelIndex → name]). + case labels([UInt32: NameMap]) + /// Subsection 4: Type names. + case types(NameMap) + /// Subsection 5: Table names. + case tables(NameMap) + /// Subsection 6: Memory names. + case memories(NameMap) + /// Subsection 7: Global names. + case globals(NameMap) + /// Subsection 8: Element segment names. + case elements(NameMap) + /// Subsection 9: Data segment names. + case dataSegments(NameMap) +} + +/// A parser for the name custom section. +/// +/// > Note: +public struct NameSectionParser { + let stream: Stream + + public init(stream: Stream) { + self.stream = stream + } + + /// Parses the entire name section. + /// + /// - Throws: If the stream is malformed or the section is invalid. + /// - Returns: A list of parsed names. + public func parseAll() throws(WasmParserError) -> [ParsedNames] { + var results: [ParsedNames] = [] + while try !stream.hasReachedEnd() { + let id = try stream.consumeAny() + guard let result = try parseNameSubsection(type: id) else { + continue + } + results.append(result) + } + return results + } + + func parseNameSubsection(type: UInt8) throws(WasmParserError) -> ParsedNames? { + let size = try stream.parseUnsigned(UInt32.self) + switch type { + case 0: return .moduleName(try stream.parseName()) + case 1: return .functions(try parseNameMap()) + case 2: return .locals(try parseIndirectNameMap()) + case 3: return .labels(try parseIndirectNameMap()) + case 4: return .types(try parseNameMap()) + case 5: return .tables(try parseNameMap()) + case 6: return .memories(try parseNameMap()) + case 7: return .globals(try parseNameMap()) + case 8: return .elements(try parseNameMap()) + case 9: return .dataSegments(try parseNameMap()) + default: + _ = try stream.consume(count: Int(size)) + return nil + } + } + + func parseNameMap() throws(WasmParserError) -> NameMap { + var nameMap: NameMap = [:] + _ = try stream.parseVector { () throws(WasmParserError) in + let index = try stream.parseUnsigned(UInt32.self) + let name = try stream.parseName() + nameMap[index] = name + } + return nameMap + } + + func parseIndirectNameMap() throws(WasmParserError) -> [UInt32: NameMap] { + var map: [UInt32: NameMap] = [:] + _ = try stream.parseVector { () throws(WasmParserError) in + let outerIndex = try stream.parseUnsigned(UInt32.self) + map[outerIndex] = try parseNameMap() + } + return map + } +} + +// MARK: - File Type Detection + +/// The type of a WebAssembly binary file. +public enum WasmFileType: Equatable, Sendable { + /// A core WebAssembly module (version 1) + case coreModule + /// A WebAssembly component (version 0x0d, layer 1) + case component + /// Unknown or invalid WebAssembly file + case unknown +} diff --git a/Sources/WasmParser/WasmParserError.swift b/Sources/WasmParserCore/WasmParserError.swift similarity index 69% rename from Sources/WasmParser/WasmParserError.swift rename to Sources/WasmParserCore/WasmParserError.swift index 4ef32f4d..9a3e4119 100644 --- a/Sources/WasmParser/WasmParserError.swift +++ b/Sources/WasmParserCore/WasmParserError.swift @@ -89,104 +89,104 @@ extension WasmParserError: CustomStringConvertible { extension WasmParserError.Message { @usableFromInline - static func invalidMagicNumber(_ bytes: [UInt8]) -> Self { + package static func invalidMagicNumber(_ bytes: [UInt8]) -> Self { Self("magic header not detected: expected \(WASM_MAGIC) but got \(bytes)") } @usableFromInline - static func unknownVersion(_ bytes: [UInt8]) -> Self { + package static func unknownVersion(_ bytes: [UInt8]) -> Self { Self("unknown binary version: \(bytes)") } - static func invalidUTF8(_ bytes: [UInt8]) -> Self { + package static func invalidUTF8(_ bytes: [UInt8]) -> Self { Self("malformed UTF-8 encoding: \(bytes)") } @usableFromInline - static func invalidSectionSize(_ size: UInt32) -> Self { + package static func invalidSectionSize(_ size: UInt32) -> Self { // TODO: Remove size parameter Self("unexpected end-of-file") } @usableFromInline - static func malformedSectionID(_ id: UInt8) -> Self { + package static func malformedSectionID(_ id: UInt8) -> Self { Self("malformed section id: \(id)") } @usableFromInline - static func malformedValueType(_ byte: UInt8) -> Self { + package static func malformedValueType(_ byte: UInt8) -> Self { Self("malformed value type: \(byte)") } - @usableFromInline static func zeroExpected(actual: UInt8) -> Self { + @usableFromInline package static func zeroExpected(actual: UInt8) -> Self { Self("Zero expected but got \(actual)") } @usableFromInline - static func tooManyLocals(_ count: UInt64, limit: UInt64) -> Self { + package static func tooManyLocals(_ count: UInt64, limit: UInt64) -> Self { Self("Too many locals: \(count) vs \(limit)") } - @usableFromInline static func expectedRefType(actual: ValueType) -> Self { + @usableFromInline package static func expectedRefType(actual: ValueType) -> Self { Self("Expected reference type but got \(actual)") } @usableFromInline - static func unexpectedElementKind(expected: UInt32, actual: UInt32) -> Self { + package static func unexpectedElementKind(expected: UInt32, actual: UInt32) -> Self { Self("Unexpected element kind: expected \(expected) but got \(actual)") } @usableFromInline - static let integerRepresentationTooLong = Self("Integer representation is too long") + package static let integerRepresentationTooLong = Self("Integer representation is too long") @usableFromInline - static let endOpcodeExpected = Self("`end` opcode expected but not found") + package static let endOpcodeExpected = Self("`end` opcode expected but not found") @usableFromInline - static let unexpectedEnd = Self("Unexpected end of the stream") + package static let unexpectedEnd = Self("Unexpected end of the stream") @usableFromInline - static func sectionSizeMismatch(sectionID: UInt8, expected: Int, actual: Int) -> Self { + package static func sectionSizeMismatch(sectionID: UInt8, expected: Int, actual: Int) -> Self { Self("Section size mismatch for section \(sectionID): expected \(expected) but got \(actual)") } - @usableFromInline static func unknownCanonOptionTag(_ tag: UInt8) -> Self { + @usableFromInline package static func unknownCanonOptionTag(_ tag: UInt8) -> Self { Self("Unknown canonical option tag: \(tag)") } - @usableFromInline static func illegalOpcode(_ opcode: [UInt8]) -> Self { + @usableFromInline package static func illegalOpcode(_ opcode: [UInt8]) -> Self { Self("Illegal opcode: \(opcode)") } @usableFromInline - static func malformedMutability(_ byte: UInt8) -> Self { + package static func malformedMutability(_ byte: UInt8) -> Self { Self("Malformed mutability: \(byte)") } @usableFromInline - static func malformedFunctionType(_ byte: UInt8) -> Self { + package static func malformedFunctionType(_ byte: UInt8) -> Self { Self("Malformed function type: \(byte)") } @usableFromInline - static let sectionOutOfOrder = Self("Sections in the module are out of order") + package static let sectionOutOfOrder = Self("Sections in the module are out of order") @usableFromInline - static func malformedLimit(_ byte: UInt8) -> Self { + package static func malformedLimit(_ byte: UInt8) -> Self { Self("Malformed limit: \(byte)") } - @usableFromInline static let malformedIndirectCall = Self("Malformed indirect call") + @usableFromInline package static let malformedIndirectCall = Self("Malformed indirect call") - @usableFromInline static func malformedDataSegmentKind(_ kind: UInt32) -> Self { + @usableFromInline package static func malformedDataSegmentKind(_ kind: UInt32) -> Self { Self("Malformed data segment kind: \(kind)") } - @usableFromInline static func invalidResultArity(expected: Int, actual: Int) -> Self { + @usableFromInline package static func invalidResultArity(expected: Int, actual: Int) -> Self { Self("invalid result arity: expected \(expected) but got \(actual)") } - @usableFromInline static func invalidFunctionType(_ index: Int64) -> Self { + @usableFromInline package static func invalidFunctionType(_ index: Int64) -> Self { Self("invalid function type index: \(index), expected a unsigned 32-bit integer") } } diff --git a/Sources/WasmParser/WasmTypes.swift b/Sources/WasmParserCore/WasmTypes.swift similarity index 100% rename from Sources/WasmParser/WasmTypes.swift rename to Sources/WasmParserCore/WasmTypes.swift diff --git a/Tests/WATTests/ComponentTests.swift b/Tests/WATTests/ComponentTests.swift index 9b376d5f..f56dc106 100644 --- a/Tests/WATTests/ComponentTests.swift +++ b/Tests/WATTests/ComponentTests.swift @@ -6,7 +6,7 @@ import WAT import WasmKit import WasmKitWASI - import WasmParser + import WasmParserCore import WasmTools @Suite diff --git a/Tests/WATTests/EncoderTests.swift b/Tests/WATTests/EncoderTests.swift index 8a80e578..0b808b5e 100644 --- a/Tests/WATTests/EncoderTests.swift +++ b/Tests/WATTests/EncoderTests.swift @@ -1,6 +1,7 @@ import Foundation import Testing -import WasmParser +import WasmParserCore +import WasmTypes @testable import WAT @@ -381,7 +382,7 @@ struct EncoderTests { options: EncodeOptions(nameSection: true) ) - var parser = WasmParser.Parser(bytes: bytes) + var parser = WasmParserCore.Parser(bytes: bytes) var customSections: [CustomSection] = [] while let payload = try parser.parseNext() { guard case .customSection(let section) = payload else { @@ -426,7 +427,7 @@ struct EncoderTests { ) // Extract the name custom section bytes - var parser = WasmParser.Parser(bytes: bytes) + var parser = WasmParserCore.Parser(bytes: bytes) var nameBytes: ArraySlice? while let payload = try parser.parseNext() { if case .customSection(let section) = payload, section.name == "name" { diff --git a/Tests/WATTests/LexerTests.swift b/Tests/WATTests/LexerTests.swift index 0ba3afef..74991bbe 100644 --- a/Tests/WATTests/LexerTests.swift +++ b/Tests/WATTests/LexerTests.swift @@ -1,6 +1,6 @@ import Foundation import Testing -import WasmParser +import WasmParserCore @testable import WAT diff --git a/Tests/WATTests/ParseOnlyTests.swift b/Tests/WATTests/ParseOnlyTests.swift index 0894421b..9a9f21a9 100644 --- a/Tests/WATTests/ParseOnlyTests.swift +++ b/Tests/WATTests/ParseOnlyTests.swift @@ -1,7 +1,7 @@ import Foundation import Testing import WAT -import WasmParser +import WasmParserCore @Suite struct ParseOnlyTests { @@ -48,7 +48,7 @@ struct ParseOnlyTests { } private func parseWasmBytes(_ bytes: [UInt8], features: WasmFeatureSet) throws { - var parser = WasmParser.Parser(bytes: bytes, features: features) + var parser = WasmParserCore.Parser(bytes: bytes, features: features) while (try parser.parseNext()) != nil {} } diff --git a/Tests/WATTests/ParserTests.swift b/Tests/WATTests/ParserTests.swift index f505692b..10d33b12 100644 --- a/Tests/WATTests/ParserTests.swift +++ b/Tests/WATTests/ParserTests.swift @@ -1,6 +1,6 @@ import Foundation import Testing -import WasmParser +import WasmParserCore @testable import WAT diff --git a/Tests/WATTests/Spectest.swift b/Tests/WATTests/Spectest.swift index a3de5369..43b2ebab 100644 --- a/Tests/WATTests/Spectest.swift +++ b/Tests/WATTests/Spectest.swift @@ -1,5 +1,5 @@ import Foundation -import WasmParser +import WasmParserCore enum Spectest { static let rootDirectory = URL(fileURLWithPath: #filePath) diff --git a/Tests/WasmKitTests/Spectest/TestCase.swift b/Tests/WasmKitTests/Spectest/TestCase.swift index d27a3463..8d5ff220 100644 --- a/Tests/WasmKitTests/Spectest/TestCase.swift +++ b/Tests/WasmKitTests/Spectest/TestCase.swift @@ -157,7 +157,7 @@ extension TestCase { handler(self, location, .failed("\(error)")) } } - } catch let parseError as WatParserError { + } catch let parseError { if let location = parseError.location { handler(self, location, .failed(parseError.message)) } else { diff --git a/Tests/WasmParserTests/LEBTests.swift b/Tests/WasmParserTests/LEBTests.swift index d275a117..2f5feccc 100644 --- a/Tests/WasmParserTests/LEBTests.swift +++ b/Tests/WasmParserTests/LEBTests.swift @@ -1,6 +1,7 @@ import Testing @testable import WasmParser +@testable import WasmParserCore @Suite struct LEBTest { @Test func unsigned() throws { diff --git a/Utilities/Sources/WasmGen.swift b/Utilities/Sources/WasmGen.swift index a2608b1e..1d1e3a7b 100644 --- a/Utilities/Sources/WasmGen.swift +++ b/Utilities/Sources/WasmGen.swift @@ -323,7 +323,7 @@ enum WasmGen { static func generateTextInstructionParser(_ instructions: InstructionSet) -> String { var code = """ - import WasmParser + import WasmParserCore import WasmTypes /// Parses a text instruction, consuming immediate tokens as necessary. @@ -412,7 +412,7 @@ enum WasmGen { static func generateBinaryInstructionEncoder(_ instructions: InstructionSet) -> String { var code = """ - import WasmParser + import WasmParserCore import WasmTypes /// An instruction encoder that is responsible for encoding opcodes and immediates @@ -751,7 +751,7 @@ enum WasmGen { let generatedFiles = [ GeneratedFile( - projectSources + ["WasmParser", "InstructionVisitor.swift"], + projectSources + ["WasmParserCore", "InstructionVisitor.swift"], header + generateInstructionEnum(instructions) + "\n\n" + generateAnyInstructionVisitor(instructions) @@ -760,7 +760,7 @@ enum WasmGen { + "\n" ), GeneratedFile( - projectSources + ["WasmParser", "BinaryInstructionDecoder.swift"], + projectSources + ["WasmParserCore", "BinaryInstructionDecoder.swift"], header + generateBinaryInstructionDecoder(instructions) ), GeneratedFile(