extern crate core_affinity; use std::alloc::Allocator; use std::iter::Sum; use std::ops::Range; use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData}; use self::core_affinity::CoreId; #[derive(Debug, Copy, Clone)] struct UnsafeData(*mut T, usize); impl UnsafeData { fn empty() -> UnsafeData { UnsafeData(([] as [T; 0]).as_mut_ptr(), 0) } fn new(xs: &mut Vec) -> UnsafeData { UnsafeData(xs.as_mut_ptr(), xs.len()) } fn get_slice(&self) -> &mut [T] { unsafe { std::slice::from_raw_parts_mut(self.0, self.1) } } } unsafe impl Send for UnsafeData {} unsafe impl Sync for UnsafeData {} #[derive(Debug, Copy, Clone)] struct UnsafeRefs { a: UnsafeData, b: UnsafeData, c: UnsafeData, } unsafe impl Send for UnsafeRefs {} unsafe impl Sync for UnsafeRefs {} pub struct UnsafeDevice { pub(crate) ncore: usize, pub(crate) pin: bool, pub(crate) core_ids: Vec, data: UnsafeRefs, } impl UnsafeDevice { pub fn new(ncore: usize, pin: bool) -> Self { let mut core_ids = match core_affinity::get_core_ids() { Some(xs) => xs, None => { colour::e_red_ln!("Cannot enumerate cores, pinning will not work if enabled"); (0..ncore).map(|i| CoreId { id: i }).collect() } }; core_ids.resize(ncore, core_ids[0]); UnsafeDevice { ncore, pin, core_ids, data: UnsafeRefs { a: UnsafeData::empty(), b: UnsafeData::empty(), c: UnsafeData::empty() }, } } fn thread_ranges(&self, len: usize) -> Vec<(usize, Range)> { let chunk = (len as f64 / self.ncore as f64).ceil() as usize; (0..self.ncore) .map(|t| { (t, if t == self.ncore - 1 { (t * chunk)..len } else { (t * chunk)..((t + 1) * chunk) }) }) .collect::>() } } // Unsafe threaded version, it should be semantically equal to the single threaded version impl RustStream for StreamData, A> { fn init_arrays(&mut self) { self.device.data.a = UnsafeData::new(&mut self.a); self.device.data.b = UnsafeData::new(&mut self.b); self.device.data.c = UnsafeData::new(&mut self.c); let init = self.init; let pin = self.device.pin; let data = self.device.data; self .device .thread_ranges(self.size) .into_iter() .map(|(t, r)| { let core = self.device.core_ids[t]; std::thread::spawn(move || { if pin { core_affinity::set_for_current(core); } let a = data.a.get_slice(); let b = data.b.get_slice(); let c = data.c.get_slice(); for i in r { a[i] = init.0; b[i] = init.1; c[i] = init.2; } }) }) .collect::>() .into_iter() .for_each(|t| t.join().unwrap()); } fn copy(&mut self) { let pin = self.device.pin; let data = self.device.data; self .device .thread_ranges(self.size) .into_iter() .map(|(t, r)| { let core = self.device.core_ids[t]; std::thread::spawn(move || { if pin { core_affinity::set_for_current(core); } let a = data.a.get_slice(); let c = data.c.get_slice(); for i in r { c[i] = a[i]; } }) }) .collect::>() .into_iter() .for_each(|t| t.join().unwrap()); } fn mul(&mut self) { let scalar = self.scalar; let pin = self.device.pin; let data = self.device.data; self .device .thread_ranges(self.size) .into_iter() .map(|(t, r)| { let core = self.device.core_ids[t]; std::thread::spawn(move || { if pin { core_affinity::set_for_current(core); } let b = data.b.get_slice(); let c = data.c.get_slice(); for i in r { b[i] = scalar * c[i]; } }) }) .collect::>() .into_iter() .for_each(|t| t.join().unwrap()); } fn add(&mut self) { let pin = self.device.pin; let data = self.device.data; self .device .thread_ranges(self.size) .into_iter() .map(|(t, r)| { let core = self.device.core_ids[t]; std::thread::spawn(move || { if pin { core_affinity::set_for_current(core); } let a = data.a.get_slice(); let b = data.b.get_slice(); let c = data.c.get_slice(); for i in r { c[i] = a[i] + b[i]; } }) }) .collect::>() .into_iter() .for_each(|t| t.join().unwrap()); } fn triad(&mut self) { let scalar = self.scalar; let pin = self.device.pin; let data = self.device.data; self .device .thread_ranges(self.size) .into_iter() .map(|(t, r)| { let core = self.device.core_ids[t]; std::thread::spawn(move || { if pin { core_affinity::set_for_current(core); } let a = data.a.get_slice(); let b = data.b.get_slice(); let c = data.c.get_slice(); for i in r { a[i] = b[i] + scalar * c[i] } }) }) .collect::>() .into_iter() .for_each(|t| t.join().unwrap()); } fn nstream(&mut self) { let scalar = self.scalar; let pin = self.device.pin; let data = self.device.data; self .device .thread_ranges(self.size) .into_iter() .map(|(t, r)| { let core = self.device.core_ids[t]; std::thread::spawn(move || { if pin { core_affinity::set_for_current(core); } let a = data.a.get_slice(); let b = data.b.get_slice(); let c = data.c.get_slice(); for i in r { a[i] += b[i] + scalar * c[i] } }) }) .collect::>() .into_iter() .for_each(|t| t.join().unwrap()); } fn dot(&mut self) -> T { let pin = self.device.pin; let data = self.device.data; self .device .thread_ranges(self.size) .into_iter() .map(|(t, r)| { let core = self.device.core_ids[t]; std::thread::spawn(move || { if pin { core_affinity::set_for_current(core); } let a = data.a.get_slice(); let b = data.b.get_slice(); let mut p = T::default(); for i in r { p += a[i] * b[i]; } p }) }) .collect::>() .into_iter() .map(|t| t.join().unwrap()) .sum() } }