image/imageops/
fast_blur.rs

1use num_traits::Bounded;
2
3use crate::imageops::filter_1d::{SafeAdd, SafeMul};
4use crate::{ImageBuffer, Pixel, Primitive};
5
6/// Approximation of Gaussian blur.
7///
8/// # Arguments
9///
10/// * `image_buffer` - source image.
11/// * `sigma` - value controls image flattening level.
12///
13/// This method assumes alpha pre-multiplication for images that contain non-constant alpha.
14///
15/// This method typically assumes that the input is scene-linear light.
16/// If it is not, color distortion may occur.
17///
18/// Source: Kovesi, P.:  Fast Almost-Gaussian Filtering The Australian Pattern
19/// Recognition Society Conference: DICTA 2010. December 2010. Sydney.
20#[must_use]
21pub fn fast_blur<P: Pixel>(
22    input_buffer: &ImageBuffer<P, Vec<P::Subpixel>>,
23    sigma: f32,
24) -> ImageBuffer<P, Vec<P::Subpixel>> {
25    let (width, height) = input_buffer.dimensions();
26
27    if width == 0 || height == 0 {
28        return input_buffer.clone();
29    }
30
31    let num_passes = 3;
32
33    let boxes = boxes_for_gauss(sigma, num_passes);
34    if boxes.is_empty() {
35        return input_buffer.clone();
36    }
37
38    let samples = input_buffer.as_flat_samples().samples;
39
40    let destination_size = match (width as usize)
41        .safe_mul(height as usize)
42        .and_then(|x| x.safe_mul(P::CHANNEL_COUNT as usize))
43    {
44        Ok(s) => s,
45        Err(_) => panic!("Width and height and channels count exceeded pointer size"),
46    };
47
48    let first_box = boxes[0];
49
50    let mut transient = vec![P::Subpixel::min_value(); destination_size];
51    let mut dst = vec![P::Subpixel::min_value(); destination_size];
52
53    // If destination_size isn't failed this one must not fail either
54    let stride = width as usize * P::CHANNEL_COUNT as usize;
55
56    // bound + radius + 1 must fit in a pointer size
57    test_radius_size(width as usize, first_box);
58    test_radius_size(height as usize, first_box);
59
60    box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
61        samples,
62        stride,
63        &mut transient,
64        stride,
65        width,
66        first_box,
67    );
68
69    box_blur_vertical_pass_strategy::<P, P::Subpixel>(
70        &transient, stride, &mut dst, stride, width, height, first_box,
71    );
72
73    for &box_container in boxes.iter().skip(1) {
74        // bound + radius + 1 must fit in a pointer size
75        test_radius_size(width as usize, box_container);
76        test_radius_size(height as usize, box_container);
77
78        box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
79            &dst,
80            stride,
81            &mut transient,
82            stride,
83            width,
84            box_container,
85        );
86
87        box_blur_vertical_pass_strategy::<P, P::Subpixel>(
88            &transient,
89            stride,
90            &mut dst,
91            stride,
92            width,
93            height,
94            box_container,
95        );
96    }
97
98    let mut buffer = ImageBuffer::from_raw(width, height, dst).unwrap();
99    buffer.copy_color_space_from(input_buffer);
100    buffer
101}
102
103#[inline]
104fn test_radius_size(bound: usize, radius: usize) {
105    match bound.safe_add(radius) {
106        Ok(_) => {}
107        Err(_) => panic!("Radius overflowed maximum possible size"),
108    }
109}
110
111fn boxes_for_gauss(sigma: f32, n: usize) -> Vec<usize> {
112    let w_ideal = f32::sqrt((12.0 * sigma.powi(2) / (n as f32)) + 1.0);
113    let mut w_l = w_ideal.floor();
114    if w_l % 2.0 == 0.0 {
115        w_l -= 1.0;
116    }
117    let w_u = w_l + 2.0;
118
119    let m_ideal = 0.25 * (n as f32) * (w_l + 3.0) - 3.0 * sigma.powi(2) * (w_l + 1.0).recip();
120
121    let m = f32::round(m_ideal) as usize;
122
123    (0..n)
124        .map(|i| if i < m { w_l as usize } else { w_u as usize })
125        .map(|i| ceil_to_odd(i.saturating_sub(1) / 2))
126        .collect::<Vec<_>>()
127}
128
129#[inline]
130fn ceil_to_odd(x: usize) -> usize {
131    if x.is_multiple_of(2) {
132        x + 1
133    } else {
134        x
135    }
136}
137
138#[inline]
139#[allow(clippy::manual_clamp)]
140fn rounding_saturating_mul<T: Primitive>(v: f32, w: f32) -> T {
141    // T::DEFAULT_MAX_VALUE is equal to 1.0 only in cases where storage type if `f32/f64`,
142    // that means it should be safe to round here.
143    if T::DEFAULT_MAX_VALUE.to_f32().unwrap() != 1.0 {
144        T::from(
145            (v * w)
146                .round()
147                .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
148                .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
149        )
150        .unwrap()
151    } else {
152        T::from(
153            (v * w)
154                .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
155                .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
156        )
157        .unwrap()
158    }
159}
160
161fn box_blur_horizontal_pass_strategy<T, P: Primitive>(
162    src: &[P],
163    src_stride: usize,
164    dst: &mut [P],
165    dst_stride: usize,
166    width: u32,
167    radius: usize,
168) where
169    T: Pixel,
170{
171    if T::CHANNEL_COUNT == 1 {
172        box_blur_horizontal_pass_impl::<P, 1>(src, src_stride, dst, dst_stride, width, radius);
173    } else if T::CHANNEL_COUNT == 2 {
174        box_blur_horizontal_pass_impl::<P, 2>(src, src_stride, dst, dst_stride, width, radius);
175    } else if T::CHANNEL_COUNT == 3 {
176        box_blur_horizontal_pass_impl::<P, 3>(src, src_stride, dst, dst_stride, width, radius);
177    } else if T::CHANNEL_COUNT == 4 {
178        box_blur_horizontal_pass_impl::<P, 4>(src, src_stride, dst, dst_stride, width, radius);
179    } else {
180        unimplemented!("More than 4 channels is not yet implemented");
181    }
182}
183
184fn box_blur_vertical_pass_strategy<T: Pixel, P: Primitive>(
185    src: &[P],
186    src_stride: usize,
187    dst: &mut [P],
188    dst_stride: usize,
189    width: u32,
190    height: u32,
191    radius: usize,
192) {
193    box_blur_vertical_pass_impl::<P>(
194        src,
195        src_stride,
196        dst,
197        dst_stride,
198        width,
199        height,
200        radius,
201        T::CHANNEL_COUNT as usize,
202    );
203}
204
205fn box_blur_horizontal_pass_impl<T, const CN: usize>(
206    src: &[T],
207    src_stride: usize,
208    dst: &mut [T],
209    dst_stride: usize,
210    width: u32,
211    radius: usize,
212) where
213    T: Primitive,
214{
215    assert!(width > 0, "Width must be sanitized before this method");
216    test_radius_size(width as usize, radius);
217
218    let kernel_size = radius * 2 + 1;
219    let edge_count = ((kernel_size / 2) + 1) as f32;
220    let half_kernel = kernel_size / 2;
221
222    let weight = 1f32 / (radius * 2 + 1) as f32;
223
224    let width_bound = width as usize - 1;
225
226    // Horizontal blurring consists from 4 phases
227    // 1 - Fill initial sliding window
228    // 2 - Blur dangerous leading zone where clamping is required
229    // 3 - Blur *normal* zone where clamping is not required
230    // 4 - Blur dangerous trailing zone where clamping is required
231
232    for (dst, src) in dst
233        .chunks_exact_mut(dst_stride)
234        .zip(src.chunks_exact(src_stride))
235    {
236        let mut weight1: f32 = 0.;
237        let mut weight2: f32 = 0.;
238        let mut weight3: f32 = 0.;
239
240        let chunk0 = &src[..CN];
241
242        // replicate edge
243        let mut weight0 = chunk0[0].to_f32().unwrap() * edge_count;
244        if CN > 1 {
245            weight1 = chunk0[1].to_f32().unwrap() * edge_count;
246        }
247        if CN > 2 {
248            weight2 = chunk0[2].to_f32().unwrap() * edge_count;
249        }
250        if CN == 4 {
251            weight3 = chunk0[3].to_f32().unwrap() * edge_count;
252        }
253
254        for x in 1..=half_kernel {
255            let px = x.min(width_bound) * CN;
256            let chunk0 = &src[px..px + CN];
257            weight0 += chunk0[0].to_f32().unwrap();
258            if CN > 1 {
259                weight1 += chunk0[1].to_f32().unwrap();
260            }
261            if CN > 2 {
262                weight2 += chunk0[2].to_f32().unwrap();
263            }
264            if CN == 4 {
265                weight3 += chunk0[3].to_f32().unwrap();
266            }
267        }
268
269        for x in 0..half_kernel.min(width as usize) {
270            let next = (x + half_kernel + 1).min(width_bound) * CN;
271            let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
272
273            let dst_chunk = &mut dst[x * CN..x * CN + CN];
274            dst_chunk[0] = rounding_saturating_mul(weight0, weight);
275            if CN > 1 {
276                dst_chunk[1] = rounding_saturating_mul(weight1, weight);
277            }
278            if CN > 2 {
279                dst_chunk[2] = rounding_saturating_mul(weight2, weight);
280            }
281            if CN == 4 {
282                dst_chunk[3] = rounding_saturating_mul(weight3, weight);
283            }
284
285            let next_chunk = &src[next..next + CN];
286            let previous_chunk = &src[previous..previous + CN];
287
288            weight0 += next_chunk[0].to_f32().unwrap();
289            if CN > 1 {
290                weight1 += next_chunk[1].to_f32().unwrap();
291            }
292            if CN > 2 {
293                weight2 += next_chunk[2].to_f32().unwrap();
294            }
295            if CN == 4 {
296                weight3 += next_chunk[3].to_f32().unwrap();
297            }
298
299            weight0 -= previous_chunk[0].to_f32().unwrap();
300            if CN > 1 {
301                weight1 -= previous_chunk[1].to_f32().unwrap();
302            }
303            if CN > 2 {
304                weight2 -= previous_chunk[2].to_f32().unwrap();
305            }
306            if CN == 4 {
307                weight3 -= previous_chunk[3].to_f32().unwrap();
308            }
309        }
310
311        let max_x_before_clamping = width_bound.saturating_sub(half_kernel + 1);
312        let row_length = src.len();
313
314        let mut last_processed_item = half_kernel;
315
316        if ((half_kernel * 2 + 1) * CN < row_length) && ((max_x_before_clamping * CN) < row_length)
317        {
318            let data_section = src;
319            let advanced_kernel_part = &data_section[(half_kernel * 2 + 1) * CN..];
320            let section_length = max_x_before_clamping - half_kernel;
321            let dst = &mut dst[half_kernel * CN..(half_kernel * CN + section_length * CN)];
322
323            let dst_chunks = dst.as_chunks_mut::<CN>().0.iter_mut();
324            let data_section_chunks = data_section.as_chunks::<CN>().0.iter();
325            let advanced_kernel_part_chunks = advanced_kernel_part.as_chunks::<CN>().0.iter();
326            for ((dst_chunk, src_previous), src_next) in dst_chunks
327                .zip(data_section_chunks)
328                .zip(advanced_kernel_part_chunks)
329            {
330                dst_chunk[0] = rounding_saturating_mul(weight0, weight);
331                if CN > 1 {
332                    dst_chunk[1] = rounding_saturating_mul(weight1, weight);
333                }
334                if CN > 2 {
335                    dst_chunk[2] = rounding_saturating_mul(weight2, weight);
336                }
337                if CN == 4 {
338                    dst_chunk[3] = rounding_saturating_mul(weight3, weight);
339                }
340
341                weight0 += src_next[0].to_f32().unwrap();
342                if CN > 1 {
343                    weight1 += src_next[1].to_f32().unwrap();
344                }
345                if CN > 2 {
346                    weight2 += src_next[2].to_f32().unwrap();
347                }
348                if CN == 4 {
349                    weight3 += src_next[3].to_f32().unwrap();
350                }
351
352                weight0 -= src_previous[0].to_f32().unwrap();
353                if CN > 1 {
354                    weight1 -= src_previous[1].to_f32().unwrap();
355                }
356                if CN > 2 {
357                    weight2 -= src_previous[2].to_f32().unwrap();
358                }
359                if CN == 4 {
360                    weight3 -= src_previous[3].to_f32().unwrap();
361                }
362            }
363
364            last_processed_item = max_x_before_clamping;
365        }
366
367        for x in last_processed_item..width as usize {
368            let next = (x + half_kernel + 1).min(width_bound) * CN;
369            let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
370            let dst_chunk = &mut dst[x * CN..x * CN + CN];
371            dst_chunk[0] = rounding_saturating_mul(weight0, weight);
372            if CN > 1 {
373                dst_chunk[1] = rounding_saturating_mul(weight1, weight);
374            }
375            if CN > 2 {
376                dst_chunk[2] = rounding_saturating_mul(weight2, weight);
377            }
378            if CN == 4 {
379                dst_chunk[3] = rounding_saturating_mul(weight3, weight);
380            }
381
382            let next_chunk = &src[next..next + CN];
383            let previous_chunk = &src[previous..previous + CN];
384
385            weight0 += next_chunk[0].to_f32().unwrap();
386            if CN > 1 {
387                weight1 += next_chunk[1].to_f32().unwrap();
388            }
389            if CN > 2 {
390                weight2 += next_chunk[2].to_f32().unwrap();
391            }
392            if CN == 4 {
393                weight3 += next_chunk[3].to_f32().unwrap();
394            }
395
396            weight0 -= previous_chunk[0].to_f32().unwrap();
397            if CN > 1 {
398                weight1 -= previous_chunk[1].to_f32().unwrap();
399            }
400            if CN > 2 {
401                weight2 -= previous_chunk[2].to_f32().unwrap();
402            }
403            if CN == 4 {
404                weight3 -= previous_chunk[3].to_f32().unwrap();
405            }
406        }
407    }
408}
409
410#[allow(clippy::too_many_arguments)]
411fn box_blur_vertical_pass_impl<T: Primitive>(
412    src: &[T],
413    src_stride: usize,
414    dst: &mut [T],
415    dst_stride: usize,
416    width: u32,
417    height: u32,
418    radius: usize,
419    n: usize,
420) {
421    assert!(width > 0, "Width must be sanitized before this method");
422    assert!(height > 0, "Height must be sanitized before this method");
423    test_radius_size(width as usize, radius);
424
425    let kernel_size = radius * 2 + 1;
426
427    let edge_count = ((kernel_size / 2) + 1) as f32;
428    let half_kernel = kernel_size / 2;
429
430    let weight = 1f32 / (radius * 2 + 1) as f32;
431
432    let buf_size = width as usize * n;
433
434    let buf_cap = buf_size;
435
436    let height_bound = height as usize - 1;
437
438    // Instead of summing each column separately we use here transient buffer that
439    // averages columns in row manner.
440    // So, we make the initial buffer at the top edge
441    // and then doing blur by averaging the whole row ( which is in buffer )
442    // and subtracting and adding next and previous rows in horizontal manner.
443
444    let mut buffer = vec![0f32; buf_cap];
445
446    for (x, (v, bf)) in src.iter().zip(buffer.iter_mut()).enumerate() {
447        let mut w = v.to_f32().unwrap() * edge_count;
448        for y in 1..=half_kernel {
449            let y_src_shift = y.min(height_bound) * src_stride;
450            w += src[y_src_shift + x].to_f32().unwrap();
451        }
452        *bf = w;
453    }
454
455    for (dst, y) in dst.chunks_exact_mut(dst_stride).zip(0..height as usize) {
456        let next = (y + half_kernel + 1).min(height_bound) * src_stride;
457        let previous = (y as i64 - half_kernel as i64).max(0) as usize * src_stride;
458
459        let next_row = &src[next..next + width as usize * n];
460        let previous_row = &src[previous..previous + width as usize * n];
461
462        for (((src_next, src_previous), buffer), dst) in next_row
463            .iter()
464            .zip(previous_row.iter())
465            .zip(buffer.iter_mut())
466            .zip(dst.iter_mut())
467        {
468            let mut weight0 = *buffer;
469
470            *dst = rounding_saturating_mul(weight0, weight);
471
472            weight0 += src_next.to_f32().unwrap();
473            weight0 -= src_previous.to_f32().unwrap();
474
475            *buffer = weight0;
476        }
477    }
478}
479
480#[cfg(test)]
481mod tests {
482    use crate::{DynamicImage, GrayAlphaImage, GrayImage, RgbImage, RgbaImage};
483    use std::time::{SystemTime, UNIX_EPOCH};
484
485    struct Rng {
486        state: u64,
487    }
488
489    impl Rng {
490        fn new(seed: u64) -> Self {
491            Self { state: seed }
492        }
493        fn next_u32(&mut self) -> u32 {
494            self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
495            (self.state >> 32) as u32
496        }
497
498        fn next_u8(&mut self) -> u8 {
499            (self.next_u32() % 256) as u8
500        }
501
502        fn next_f32_in_range(&mut self, a: f32, b: f32) -> f32 {
503            let u = self.next_u32();
504            let unit = (u as f32) / (u32::MAX as f32 + 1.0);
505            a + (b - a) * unit
506        }
507    }
508
509    #[test]
510    fn test_box_blur() {
511        let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
512        let mut rng = Rng::new((now.as_millis() & 0xffff_ffff_ffff_ffff) as u64);
513        for _ in 0..35 {
514            let width = rng.next_u8();
515            let height = rng.next_u8();
516            let sigma = rng.next_f32_in_range(0., 100.);
517            let px = rng.next_u8();
518            let cn = rng.next_u8();
519            if width == 0 || height == 0 || sigma <= 0. {
520                continue;
521            }
522            match cn % 4 {
523                0 => {
524                    let vc = vec![px; width as usize * height as usize];
525                    let image = DynamicImage::from(
526                        GrayImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
527                    );
528                    let res = image.fast_blur(sigma);
529                    for clr in res.as_bytes() {
530                        assert_eq!(*clr, px);
531                    }
532                }
533                1 => {
534                    let vc = vec![px; width as usize * height as usize * 2];
535                    let image = DynamicImage::from(
536                        GrayAlphaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
537                    );
538                    let res = image.fast_blur(sigma);
539                    for clr in res.as_bytes() {
540                        assert_eq!(*clr, px);
541                    }
542                }
543                2 => {
544                    let vc = vec![px; width as usize * height as usize * 3];
545                    let image = DynamicImage::from(
546                        RgbImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
547                    );
548                    let res = image.fast_blur(sigma);
549                    for clr in res.as_bytes() {
550                        assert_eq!(*clr, px);
551                    }
552                }
553                3 => {
554                    let vc = vec![px; width as usize * height as usize * 4];
555                    let image = DynamicImage::from(
556                        RgbaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
557                    );
558                    let res = image.fast_blur(sigma);
559                    for clr in res.as_bytes() {
560                        assert_eq!(*clr, px);
561                    }
562                }
563                _ => {}
564            }
565        }
566    }
567}
image/imageops/fast_blur.rs

image/imageops/
fast_blur.rs