use rayon::prelude::*;
use std::simd::f32x8;

// A basic imperative for-each loop
pub fn vec_sum_for_loop(values: &Vec<f32>) -> f32 {
    let mut total: f32 = 0.0;
    for x in values {
        total += x;
    }
    total
}

// Break the data stall with two acccumulators
pub fn vec_sum_two_accumulators(values: &Vec<f32>) -> f32 {
    assert!(values.len() % 2 == 0);
    let mut total1: f32 = 0.0;
    let mut total2: f32 = 0.0;
    for i in (0..values.len() - 1).step_by(2) {
        total1 += values[i];
        total2 += values[i + 1];
    }
    total1 + total2
}

// Iterate and let it accumulate.
pub fn vec_sum_fold(values: &Vec<f32>) -> f32 {
    values.iter().fold(0.0, |a, b| a + b)
}

// The easy way
pub fn vec_sum_method(values: &Vec<f32>) -> f32 {
    values.iter().sum()
}

// Tell the compiler we're okay with associative addition.
pub fn vec_sum_algebraic(values: &Vec<f32>) -> f32 {
    values
        .iter()
        .copied()
        .fold(0.0, |a, b| a.algebraic_add(b))
}

// In parallel with Rayon
pub fn vec_sum_parallel(values: &Vec<f32>) -> f32 {
    values
        .par_iter()
        .copied()
        .reduce(|| 0.0, |a, b| a.algebraic_add(b))
}

// Using SIMD operations from std::simd
pub fn vec_sum_simd(values: &Vec<f32>) -> f32 {
    let chunks = values.iter().copied().array_chunks();
    let v: f32x8 = chunks.clone().map(f32x8::from_array).sum();
    let tail: f32 = chunks.into_remainder().unwrap().sum();
    v.as_array().iter().sum::<f32>() + tail
}
