1use num_traits::Bounded;
2
3use crate::imageops::filter_1d::{SafeAdd, SafeMul};
4use crate::{ImageBuffer, Pixel, Primitive};
5
6#[must_use]
21pub fn fast_blur<P: Pixel>(
22 input_buffer: &ImageBuffer<P, Vec<P::Subpixel>>,
23 sigma: f32,
24) -> ImageBuffer<P, Vec<P::Subpixel>> {
25 let (width, height) = input_buffer.dimensions();
26
27 if width == 0 || height == 0 {
28 return input_buffer.clone();
29 }
30
31 let num_passes = 3;
32
33 let boxes = boxes_for_gauss(sigma, num_passes);
34 if boxes.is_empty() {
35 return input_buffer.clone();
36 }
37
38 let samples = input_buffer.as_flat_samples().samples;
39
40 let destination_size = match (width as usize)
41 .safe_mul(height as usize)
42 .and_then(|x| x.safe_mul(P::CHANNEL_COUNT as usize))
43 {
44 Ok(s) => s,
45 Err(_) => panic!("Width and height and channels count exceeded pointer size"),
46 };
47
48 let first_box = boxes[0];
49
50 let mut transient = vec![P::Subpixel::min_value(); destination_size];
51 let mut dst = vec![P::Subpixel::min_value(); destination_size];
52
53 let stride = width as usize * P::CHANNEL_COUNT as usize;
55
56 test_radius_size(width as usize, first_box);
58 test_radius_size(height as usize, first_box);
59
60 box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
61 samples,
62 stride,
63 &mut transient,
64 stride,
65 width,
66 first_box,
67 );
68
69 box_blur_vertical_pass_strategy::<P, P::Subpixel>(
70 &transient, stride, &mut dst, stride, width, height, first_box,
71 );
72
73 for &box_container in boxes.iter().skip(1) {
74 test_radius_size(width as usize, box_container);
76 test_radius_size(height as usize, box_container);
77
78 box_blur_horizontal_pass_strategy::<P, P::Subpixel>(
79 &dst,
80 stride,
81 &mut transient,
82 stride,
83 width,
84 box_container,
85 );
86
87 box_blur_vertical_pass_strategy::<P, P::Subpixel>(
88 &transient,
89 stride,
90 &mut dst,
91 stride,
92 width,
93 height,
94 box_container,
95 );
96 }
97
98 let mut buffer = ImageBuffer::from_raw(width, height, dst).unwrap();
99 buffer.copy_color_space_from(input_buffer);
100 buffer
101}
102
103#[inline]
104fn test_radius_size(bound: usize, radius: usize) {
105 match bound.safe_add(radius) {
106 Ok(_) => {}
107 Err(_) => panic!("Radius overflowed maximum possible size"),
108 }
109}
110
111fn boxes_for_gauss(sigma: f32, n: usize) -> Vec<usize> {
112 let w_ideal = f32::sqrt((12.0 * sigma.powi(2) / (n as f32)) + 1.0);
113 let mut w_l = w_ideal.floor();
114 if w_l % 2.0 == 0.0 {
115 w_l -= 1.0;
116 }
117 let w_u = w_l + 2.0;
118
119 let m_ideal = 0.25 * (n as f32) * (w_l + 3.0) - 3.0 * sigma.powi(2) * (w_l + 1.0).recip();
120
121 let m = f32::round(m_ideal) as usize;
122
123 (0..n)
124 .map(|i| if i < m { w_l as usize } else { w_u as usize })
125 .map(|i| ceil_to_odd(i.saturating_sub(1) / 2))
126 .collect::<Vec<_>>()
127}
128
129#[inline]
130fn ceil_to_odd(x: usize) -> usize {
131 if x.is_multiple_of(2) {
132 x + 1
133 } else {
134 x
135 }
136}
137
138#[inline]
139#[allow(clippy::manual_clamp)]
140fn rounding_saturating_mul<T: Primitive>(v: f32, w: f32) -> T {
141 if T::DEFAULT_MAX_VALUE.to_f32().unwrap() != 1.0 {
144 T::from(
145 (v * w)
146 .round()
147 .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
148 .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
149 )
150 .unwrap()
151 } else {
152 T::from(
153 (v * w)
154 .min(T::DEFAULT_MAX_VALUE.to_f32().unwrap())
155 .max(T::DEFAULT_MIN_VALUE.to_f32().unwrap()),
156 )
157 .unwrap()
158 }
159}
160
161fn box_blur_horizontal_pass_strategy<T, P: Primitive>(
162 src: &[P],
163 src_stride: usize,
164 dst: &mut [P],
165 dst_stride: usize,
166 width: u32,
167 radius: usize,
168) where
169 T: Pixel,
170{
171 if T::CHANNEL_COUNT == 1 {
172 box_blur_horizontal_pass_impl::<P, 1>(src, src_stride, dst, dst_stride, width, radius);
173 } else if T::CHANNEL_COUNT == 2 {
174 box_blur_horizontal_pass_impl::<P, 2>(src, src_stride, dst, dst_stride, width, radius);
175 } else if T::CHANNEL_COUNT == 3 {
176 box_blur_horizontal_pass_impl::<P, 3>(src, src_stride, dst, dst_stride, width, radius);
177 } else if T::CHANNEL_COUNT == 4 {
178 box_blur_horizontal_pass_impl::<P, 4>(src, src_stride, dst, dst_stride, width, radius);
179 } else {
180 unimplemented!("More than 4 channels is not yet implemented");
181 }
182}
183
184fn box_blur_vertical_pass_strategy<T: Pixel, P: Primitive>(
185 src: &[P],
186 src_stride: usize,
187 dst: &mut [P],
188 dst_stride: usize,
189 width: u32,
190 height: u32,
191 radius: usize,
192) {
193 box_blur_vertical_pass_impl::<P>(
194 src,
195 src_stride,
196 dst,
197 dst_stride,
198 width,
199 height,
200 radius,
201 T::CHANNEL_COUNT as usize,
202 );
203}
204
205fn box_blur_horizontal_pass_impl<T, const CN: usize>(
206 src: &[T],
207 src_stride: usize,
208 dst: &mut [T],
209 dst_stride: usize,
210 width: u32,
211 radius: usize,
212) where
213 T: Primitive,
214{
215 assert!(width > 0, "Width must be sanitized before this method");
216 test_radius_size(width as usize, radius);
217
218 let kernel_size = radius * 2 + 1;
219 let edge_count = ((kernel_size / 2) + 1) as f32;
220 let half_kernel = kernel_size / 2;
221
222 let weight = 1f32 / (radius * 2 + 1) as f32;
223
224 let width_bound = width as usize - 1;
225
226 for (dst, src) in dst
233 .chunks_exact_mut(dst_stride)
234 .zip(src.chunks_exact(src_stride))
235 {
236 let mut weight1: f32 = 0.;
237 let mut weight2: f32 = 0.;
238 let mut weight3: f32 = 0.;
239
240 let chunk0 = &src[..CN];
241
242 let mut weight0 = chunk0[0].to_f32().unwrap() * edge_count;
244 if CN > 1 {
245 weight1 = chunk0[1].to_f32().unwrap() * edge_count;
246 }
247 if CN > 2 {
248 weight2 = chunk0[2].to_f32().unwrap() * edge_count;
249 }
250 if CN == 4 {
251 weight3 = chunk0[3].to_f32().unwrap() * edge_count;
252 }
253
254 for x in 1..=half_kernel {
255 let px = x.min(width_bound) * CN;
256 let chunk0 = &src[px..px + CN];
257 weight0 += chunk0[0].to_f32().unwrap();
258 if CN > 1 {
259 weight1 += chunk0[1].to_f32().unwrap();
260 }
261 if CN > 2 {
262 weight2 += chunk0[2].to_f32().unwrap();
263 }
264 if CN == 4 {
265 weight3 += chunk0[3].to_f32().unwrap();
266 }
267 }
268
269 for x in 0..half_kernel.min(width as usize) {
270 let next = (x + half_kernel + 1).min(width_bound) * CN;
271 let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
272
273 let dst_chunk = &mut dst[x * CN..x * CN + CN];
274 dst_chunk[0] = rounding_saturating_mul(weight0, weight);
275 if CN > 1 {
276 dst_chunk[1] = rounding_saturating_mul(weight1, weight);
277 }
278 if CN > 2 {
279 dst_chunk[2] = rounding_saturating_mul(weight2, weight);
280 }
281 if CN == 4 {
282 dst_chunk[3] = rounding_saturating_mul(weight3, weight);
283 }
284
285 let next_chunk = &src[next..next + CN];
286 let previous_chunk = &src[previous..previous + CN];
287
288 weight0 += next_chunk[0].to_f32().unwrap();
289 if CN > 1 {
290 weight1 += next_chunk[1].to_f32().unwrap();
291 }
292 if CN > 2 {
293 weight2 += next_chunk[2].to_f32().unwrap();
294 }
295 if CN == 4 {
296 weight3 += next_chunk[3].to_f32().unwrap();
297 }
298
299 weight0 -= previous_chunk[0].to_f32().unwrap();
300 if CN > 1 {
301 weight1 -= previous_chunk[1].to_f32().unwrap();
302 }
303 if CN > 2 {
304 weight2 -= previous_chunk[2].to_f32().unwrap();
305 }
306 if CN == 4 {
307 weight3 -= previous_chunk[3].to_f32().unwrap();
308 }
309 }
310
311 let max_x_before_clamping = width_bound.saturating_sub(half_kernel + 1);
312 let row_length = src.len();
313
314 let mut last_processed_item = half_kernel;
315
316 if ((half_kernel * 2 + 1) * CN < row_length) && ((max_x_before_clamping * CN) < row_length)
317 {
318 let data_section = src;
319 let advanced_kernel_part = &data_section[(half_kernel * 2 + 1) * CN..];
320 let section_length = max_x_before_clamping - half_kernel;
321 let dst = &mut dst[half_kernel * CN..(half_kernel * CN + section_length * CN)];
322
323 let dst_chunks = dst.as_chunks_mut::<CN>().0.iter_mut();
324 let data_section_chunks = data_section.as_chunks::<CN>().0.iter();
325 let advanced_kernel_part_chunks = advanced_kernel_part.as_chunks::<CN>().0.iter();
326 for ((dst_chunk, src_previous), src_next) in dst_chunks
327 .zip(data_section_chunks)
328 .zip(advanced_kernel_part_chunks)
329 {
330 dst_chunk[0] = rounding_saturating_mul(weight0, weight);
331 if CN > 1 {
332 dst_chunk[1] = rounding_saturating_mul(weight1, weight);
333 }
334 if CN > 2 {
335 dst_chunk[2] = rounding_saturating_mul(weight2, weight);
336 }
337 if CN == 4 {
338 dst_chunk[3] = rounding_saturating_mul(weight3, weight);
339 }
340
341 weight0 += src_next[0].to_f32().unwrap();
342 if CN > 1 {
343 weight1 += src_next[1].to_f32().unwrap();
344 }
345 if CN > 2 {
346 weight2 += src_next[2].to_f32().unwrap();
347 }
348 if CN == 4 {
349 weight3 += src_next[3].to_f32().unwrap();
350 }
351
352 weight0 -= src_previous[0].to_f32().unwrap();
353 if CN > 1 {
354 weight1 -= src_previous[1].to_f32().unwrap();
355 }
356 if CN > 2 {
357 weight2 -= src_previous[2].to_f32().unwrap();
358 }
359 if CN == 4 {
360 weight3 -= src_previous[3].to_f32().unwrap();
361 }
362 }
363
364 last_processed_item = max_x_before_clamping;
365 }
366
367 for x in last_processed_item..width as usize {
368 let next = (x + half_kernel + 1).min(width_bound) * CN;
369 let previous = (x as i64 - half_kernel as i64).max(0) as usize * CN;
370 let dst_chunk = &mut dst[x * CN..x * CN + CN];
371 dst_chunk[0] = rounding_saturating_mul(weight0, weight);
372 if CN > 1 {
373 dst_chunk[1] = rounding_saturating_mul(weight1, weight);
374 }
375 if CN > 2 {
376 dst_chunk[2] = rounding_saturating_mul(weight2, weight);
377 }
378 if CN == 4 {
379 dst_chunk[3] = rounding_saturating_mul(weight3, weight);
380 }
381
382 let next_chunk = &src[next..next + CN];
383 let previous_chunk = &src[previous..previous + CN];
384
385 weight0 += next_chunk[0].to_f32().unwrap();
386 if CN > 1 {
387 weight1 += next_chunk[1].to_f32().unwrap();
388 }
389 if CN > 2 {
390 weight2 += next_chunk[2].to_f32().unwrap();
391 }
392 if CN == 4 {
393 weight3 += next_chunk[3].to_f32().unwrap();
394 }
395
396 weight0 -= previous_chunk[0].to_f32().unwrap();
397 if CN > 1 {
398 weight1 -= previous_chunk[1].to_f32().unwrap();
399 }
400 if CN > 2 {
401 weight2 -= previous_chunk[2].to_f32().unwrap();
402 }
403 if CN == 4 {
404 weight3 -= previous_chunk[3].to_f32().unwrap();
405 }
406 }
407 }
408}
409
410#[allow(clippy::too_many_arguments)]
411fn box_blur_vertical_pass_impl<T: Primitive>(
412 src: &[T],
413 src_stride: usize,
414 dst: &mut [T],
415 dst_stride: usize,
416 width: u32,
417 height: u32,
418 radius: usize,
419 n: usize,
420) {
421 assert!(width > 0, "Width must be sanitized before this method");
422 assert!(height > 0, "Height must be sanitized before this method");
423 test_radius_size(width as usize, radius);
424
425 let kernel_size = radius * 2 + 1;
426
427 let edge_count = ((kernel_size / 2) + 1) as f32;
428 let half_kernel = kernel_size / 2;
429
430 let weight = 1f32 / (radius * 2 + 1) as f32;
431
432 let buf_size = width as usize * n;
433
434 let buf_cap = buf_size;
435
436 let height_bound = height as usize - 1;
437
438 let mut buffer = vec![0f32; buf_cap];
445
446 for (x, (v, bf)) in src.iter().zip(buffer.iter_mut()).enumerate() {
447 let mut w = v.to_f32().unwrap() * edge_count;
448 for y in 1..=half_kernel {
449 let y_src_shift = y.min(height_bound) * src_stride;
450 w += src[y_src_shift + x].to_f32().unwrap();
451 }
452 *bf = w;
453 }
454
455 for (dst, y) in dst.chunks_exact_mut(dst_stride).zip(0..height as usize) {
456 let next = (y + half_kernel + 1).min(height_bound) * src_stride;
457 let previous = (y as i64 - half_kernel as i64).max(0) as usize * src_stride;
458
459 let next_row = &src[next..next + width as usize * n];
460 let previous_row = &src[previous..previous + width as usize * n];
461
462 for (((src_next, src_previous), buffer), dst) in next_row
463 .iter()
464 .zip(previous_row.iter())
465 .zip(buffer.iter_mut())
466 .zip(dst.iter_mut())
467 {
468 let mut weight0 = *buffer;
469
470 *dst = rounding_saturating_mul(weight0, weight);
471
472 weight0 += src_next.to_f32().unwrap();
473 weight0 -= src_previous.to_f32().unwrap();
474
475 *buffer = weight0;
476 }
477 }
478}
479
480#[cfg(test)]
481mod tests {
482 use crate::{DynamicImage, GrayAlphaImage, GrayImage, RgbImage, RgbaImage};
483 use std::time::{SystemTime, UNIX_EPOCH};
484
485 struct Rng {
486 state: u64,
487 }
488
489 impl Rng {
490 fn new(seed: u64) -> Self {
491 Self { state: seed }
492 }
493 fn next_u32(&mut self) -> u32 {
494 self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1);
495 (self.state >> 32) as u32
496 }
497
498 fn next_u8(&mut self) -> u8 {
499 (self.next_u32() % 256) as u8
500 }
501
502 fn next_f32_in_range(&mut self, a: f32, b: f32) -> f32 {
503 let u = self.next_u32();
504 let unit = (u as f32) / (u32::MAX as f32 + 1.0);
505 a + (b - a) * unit
506 }
507 }
508
509 #[test]
510 fn test_box_blur() {
511 let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
512 let mut rng = Rng::new((now.as_millis() & 0xffff_ffff_ffff_ffff) as u64);
513 for _ in 0..35 {
514 let width = rng.next_u8();
515 let height = rng.next_u8();
516 let sigma = rng.next_f32_in_range(0., 100.);
517 let px = rng.next_u8();
518 let cn = rng.next_u8();
519 if width == 0 || height == 0 || sigma <= 0. {
520 continue;
521 }
522 match cn % 4 {
523 0 => {
524 let vc = vec![px; width as usize * height as usize];
525 let image = DynamicImage::from(
526 GrayImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
527 );
528 let res = image.fast_blur(sigma);
529 for clr in res.as_bytes() {
530 assert_eq!(*clr, px);
531 }
532 }
533 1 => {
534 let vc = vec![px; width as usize * height as usize * 2];
535 let image = DynamicImage::from(
536 GrayAlphaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
537 );
538 let res = image.fast_blur(sigma);
539 for clr in res.as_bytes() {
540 assert_eq!(*clr, px);
541 }
542 }
543 2 => {
544 let vc = vec![px; width as usize * height as usize * 3];
545 let image = DynamicImage::from(
546 RgbImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
547 );
548 let res = image.fast_blur(sigma);
549 for clr in res.as_bytes() {
550 assert_eq!(*clr, px);
551 }
552 }
553 3 => {
554 let vc = vec![px; width as usize * height as usize * 4];
555 let image = DynamicImage::from(
556 RgbaImage::from_vec(u32::from(width), u32::from(height), vc).unwrap(),
557 );
558 let res = image.fast_blur(sigma);
559 for clr in res.as_bytes() {
560 assert_eq!(*clr, px);
561 }
562 }
563 _ => {}
564 }
565 }
566 }
567}