diff --git a/hal/src/dmac/async_api.rs b/hal/src/dmac/async_api.rs index f1d24767c419..b00727064bac 100644 --- a/hal/src/dmac/async_api.rs +++ b/hal/src/dmac/async_api.rs @@ -1,11 +1,10 @@ //! APIs for async DMAC operations. use atsamd_hal_macros::hal_cfg; -use core::sync::atomic; use crate::{ async_hal::interrupts::{DMAC, Handler}, - dmac::{TriggerSource, waker::WAKERS}, + dmac::{TriggerSource, asm_fence, waker::WAKERS}, util::BitIter, }; @@ -55,11 +54,7 @@ impl Handler for InterruptHandler { core::hint::spin_loop(); } - // Prevent the compiler from re-ordering read/write - // operations beyond this fence. - // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations) - atomic::fence(atomic::Ordering::Acquire); // ▼ - + asm_fence(); // ▲ WAKERS[pend_channel as usize].wake(); } } @@ -112,11 +107,7 @@ impl Handler for InterruptHandler { core::hint::spin_loop(); } - // Prevent the compiler from re-ordering read/write - // operations beyond this fence. - // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations) - atomic::fence(atomic::Ordering::Acquire); // ▼ - + asm_fence(); // ▲ WAKERS[channel].wake(); } } diff --git a/hal/src/dmac/channel/mod.rs b/hal/src/dmac/channel/mod.rs index 26bcaf822b86..cada8a02190d 100644 --- a/hal/src/dmac/channel/mod.rs +++ b/hal/src/dmac/channel/mod.rs @@ -33,8 +33,7 @@ #![allow(unused_braces)] -use core::marker::PhantomData; -use core::sync::atomic; +use core::{marker::PhantomData, ptr::NonNull}; use atsamd_hal_macros::{hal_cfg, hal_macro_helper}; @@ -44,7 +43,10 @@ use super::{ sram::{self, DmacDescriptor}, transfer::{BufferPair, Transfer}, }; -use crate::typelevel::{Is, Sealed}; +use crate::{ + dmac::asm_fence, + typelevel::{Is, Sealed}, +}; use modular_bitfield::prelude::*; mod reg; @@ -271,10 +273,7 @@ impl Channel { /// Enable the transfer, and emit a compiler fence. #[inline] fn _enable_private(&mut self) { - // Prevent the compiler from re-ordering read/write - // operations beyond this fence. - // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations) - atomic::fence(atomic::Ordering::Release); // ▲ + asm_fence(); // ▲ self.regs.chctrla.modify(|_, w| w.enable().set_bit()); } @@ -288,10 +287,7 @@ impl Channel { core::hint::spin_loop(); } - // Prevent the compiler from re-ordering read/write - // operations beyond this fence. - // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations) - atomic::fence(atomic::Ordering::Acquire); // ▼ + asm_fence(); // ▼ } /// Returns whether or not the transfer is complete. @@ -346,9 +342,9 @@ impl Channel { // SAFETY This is safe as we are only reading the descriptor's address, // and not actually writing any data to it. We also assume the descriptor // will never be moved. - descriptor as *mut _ + NonNull::new(descriptor) } else { - core::ptr::null_mut() + None }; unsafe { @@ -846,7 +842,7 @@ pub(crate) unsafe fn write_descriptor descriptor: &mut DmacDescriptor, source: &mut Src, destination: &mut Dst, - next: *mut DmacDescriptor, + next: Option>, ) { let src_ptr = source.dma_ptr(); let src_inc = source.incrementing(); @@ -868,17 +864,26 @@ pub(crate) unsafe fn write_descriptor .with_beatsize(Src::Beat::BEATSIZE) .with_valid(true); - *descriptor = DmacDescriptor { - // Next descriptor address: 0x0 terminates the transaction (no linked list), - // any other address points to the next block descriptor - descaddr: next, - // Source address: address of the last beat transfer source in block - srcaddr: src_ptr as *mut _, - // Destination address: address of the last beat transfer destination in block - dstaddr: dst_ptr as *mut _, - // Block transfer count: number of beats in block transfer - btcnt: length as u16, - // Block transfer control: Datasheet section 19.8.2.1 p.329 - btctrl, - }; + // Seems like we need a fence before the buffer pointer "escapes" the current + // function. I don't claim to fully understand why, or what the gnarly LLVM + // optimization details might be. But seems like the buffer pointers must be + // written somewhere with a _volatile_ access, _after_ an (asm) compiler + // fence: https://users.rust-lang.org/t/compiler-fence-dma/132027/40 + asm_fence(); + + unsafe { + core::ptr::from_mut(descriptor).write_volatile(DmacDescriptor { + // Next descriptor address: 0x0 terminates the transaction (no linked list), + // any other address points to the next block descriptor + descaddr: next.map(|n| n.as_ptr()).unwrap_or(core::ptr::null_mut()), + // Source address: address of the last beat transfer source in block + srcaddr: src_ptr as *mut _, + // Destination address: address of the last beat transfer destination in block + dstaddr: dst_ptr as *mut _, + // Block transfer count: number of beats in block transfer + btcnt: length as u16, + // Block transfer control: Datasheet section 19.8.2.1 p.329 + btctrl, + }); + } } diff --git a/hal/src/dmac/channel/reg.rs b/hal/src/dmac/channel/reg.rs index 3dac223a8976..4f40dc175180 100644 --- a/hal/src/dmac/channel/reg.rs +++ b/hal/src/dmac/channel/reg.rs @@ -18,10 +18,9 @@ use paste::paste; use crate::pac::{ self, Dmac, Peripherals, - dmac::{Busych, Intstatus, Pendch, Swtrigctrl}, dmac::{ - busych::BusychSpec, intstatus::IntstatusSpec, pendch::PendchSpec, - swtrigctrl::SwtrigctrlSpec, + Busych, Intstatus, Pendch, Swtrigctrl, busych::BusychSpec, intstatus::IntstatusSpec, + pendch::PendchSpec, swtrigctrl::SwtrigctrlSpec, }, }; @@ -324,9 +323,6 @@ impl Drop for RegisterBlock { core::hint::spin_loop(); } - // Prevent the compiler from re-ordering read/write - // operations beyond this fence. - // (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations) - core::sync::atomic::fence(core::sync::atomic::Ordering::Acquire); // ▼ + crate::dmac::channel::asm_fence(); // ▼ } } diff --git a/hal/src/dmac/mod.rs b/hal/src/dmac/mod.rs index c1d4980a9daf..6f41865279fc 100644 --- a/hal/src/dmac/mod.rs +++ b/hal/src/dmac/mod.rs @@ -529,3 +529,19 @@ mod waker { pub(super) static WAKERS: [AtomicWaker; with_num_channels!(get)] = [NEW_WAKER; with_num_channels!(get)]; } + +/// Prevent the compiler from re-ordering read/write +/// operations beyond this function. +/// (see https://docs.rust-embedded.org/embedonomicon/dma.html#compiler-misoptimizations) +#[inline(always)] +fn asm_fence() { + // TODO: Seems like compiler fences aren't enough to guarantee memory accesses + // won't be reordered. (see https://users.rust-lang.org/t/compiler-fence-dma/132027) + // core::sync::atomic::fence(core::sync::atomic::Ordering::Acquire); // ▼ + + // Apparently, the only truly foolproof way to prevent reordering is with inline + // asm + unsafe { + core::arch::asm!("dmb"); + } +} diff --git a/hal/src/sercom/i2c/async_api.rs b/hal/src/sercom/i2c/async_api.rs index 7b07651bcd3f..5e7879191121 100644 --- a/hal/src/sercom/i2c/async_api.rs +++ b/hal/src/sercom/i2c/async_api.rs @@ -434,6 +434,7 @@ mod dma { use crate::dmac::{channel, sram::DmacDescriptor}; use crate::sercom::dma::SharedSliceBuffer; use Operation::{Read, Write}; + use core::ptr::{self, NonNull}; const NUM_LINKED_TRANSFERS: usize = 16; @@ -485,7 +486,7 @@ mod dma { .unwrap_or_else(|_| panic!("BUG: DMAC descriptors overflow")); let last_descriptor = descriptors.last_mut().unwrap(); let next_ptr = - (last_descriptor as *mut DmacDescriptor).wrapping_add(1); + NonNull::new((ptr::from_mut(last_descriptor)).wrapping_add(1)); unsafe { channel::write_descriptor( @@ -509,7 +510,7 @@ mod dma { .unwrap_or_else(|_| panic!("BUG: DMAC descriptors overflow")); let last_descriptor = descriptors.last_mut().unwrap(); let next_ptr = - (last_descriptor as *mut DmacDescriptor).wrapping_add(1); + NonNull::new((ptr::from_mut(last_descriptor)).wrapping_add(1)); let mut bytes = SharedSliceBuffer::from_slice(bytes); unsafe { diff --git a/hal/src/sercom/i2c/impl_ehal.rs b/hal/src/sercom/i2c/impl_ehal.rs index be677df50c23..0ad0efebf397 100644 --- a/hal/src/sercom/i2c/impl_ehal.rs +++ b/hal/src/sercom/i2c/impl_ehal.rs @@ -326,6 +326,7 @@ mod dma { address: u8, operations: &mut [i2c::Operation<'_>], ) -> Result<(), Self::Error> { + use core::ptr::{self, NonNull}; use i2c::Operation::{Read, Write}; const NUM_LINKED_TRANSFERS: usize = 16; @@ -375,7 +376,7 @@ mod dma { .unwrap_or_else(|_| panic!("BUG: DMAC descriptors overflow")); let last_descriptor = descriptors.last_mut().unwrap(); let next_ptr = - (last_descriptor as *mut DmacDescriptor).wrapping_add(1); + NonNull::new((ptr::from_mut(last_descriptor)).wrapping_add(1)); unsafe { channel::write_descriptor( @@ -399,7 +400,7 @@ mod dma { .unwrap_or_else(|_| panic!("BUG: DMAC descriptors overflow")); let last_descriptor = descriptors.last_mut().unwrap(); let next_ptr = - (last_descriptor as *mut DmacDescriptor).wrapping_add(1); + NonNull::new((ptr::from_mut(last_descriptor)).wrapping_add(1)); let mut bytes = SharedSliceBuffer::from_slice(bytes); unsafe { diff --git a/hal/src/sercom/spi/async_api/dma.rs b/hal/src/sercom/spi/async_api/dma.rs index 87141318ea72..3cf3e8a6263b 100644 --- a/hal/src/sercom/spi/async_api/dma.rs +++ b/hal/src/sercom/spi/async_api/dma.rs @@ -343,7 +343,7 @@ where &mut sercom_ptr, &mut sink, // Add a null descriptor pointer to end the transfer. - core::ptr::null_mut(), + None, ); } @@ -361,7 +361,7 @@ where &mut source, &mut sercom_ptr, // Add a null descriptor pointer to end the transfer. - core::ptr::null_mut(), + None, ); } diff --git a/hal/src/sercom/spi/impl_ehal/dma.rs b/hal/src/sercom/spi/impl_ehal/dma.rs index 3e0929d907f9..8c0be194c865 100644 --- a/hal/src/sercom/spi/impl_ehal/dma.rs +++ b/hal/src/sercom/spi/impl_ehal/dma.rs @@ -210,7 +210,7 @@ where &mut sercom_ptr, &mut sink, // Add a null descriptor pointer to end the transfer. - core::ptr::null_mut(), + None, ); } @@ -228,7 +228,7 @@ where &mut source, &mut sercom_ptr, // Add a null descriptor pointer to end the transfer. - core::ptr::null_mut(), + None, ); }