1use alloc::{borrow::Cow, sync::Arc};
2use core::{
3 ops::{DerefMut, Range},
4 sync::atomic::{AtomicBool, Ordering},
5};
6use std::thread::{self, ThreadId};
7
8use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore};
9use bevy_ecs::resource::Resource;
10use bevy_ecs::system::{Res, ResMut};
11use bevy_platform::time::Instant;
12use std::sync::Mutex;
13use wgpu::{
14 Buffer, BufferDescriptor, BufferUsages, CommandEncoder, ComputePass, Features, MapMode,
15 PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType, RenderPass,
16};
17
18use crate::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper};
19
20use super::RecordDiagnostics;
21
22const MAX_TIMESTAMP_QUERIES: u32 = 256;
24const MAX_PIPELINE_STATISTICS: u32 = 128;
25
26const TIMESTAMP_SIZE: u64 = 8;
27const PIPELINE_STATISTICS_SIZE: u64 = 40;
28
29struct DiagnosticsRecorderInternal {
30 timestamp_period_ns: f32,
31 features: Features,
32 current_frame: Mutex<FrameData>,
33 submitted_frames: Vec<FrameData>,
34 finished_frames: Vec<FrameData>,
35 #[cfg(feature = "tracing-tracy")]
36 tracy_gpu_context: tracy_client::GpuContext,
37}
38
39#[derive(Resource)]
42pub struct DiagnosticsRecorder(WgpuWrapper<DiagnosticsRecorderInternal>);
43
44impl DiagnosticsRecorder {
45 pub fn new(
47 adapter_info: &RenderAdapterInfo,
48 device: &RenderDevice,
49 queue: &RenderQueue,
50 ) -> DiagnosticsRecorder {
51 let features = device.features();
52
53 #[cfg(feature = "tracing-tracy")]
54 let tracy_gpu_context =
55 super::tracy_gpu::new_tracy_gpu_context(adapter_info, device, queue);
56 let _ = adapter_info; DiagnosticsRecorder(WgpuWrapper::new(DiagnosticsRecorderInternal {
59 timestamp_period_ns: queue.get_timestamp_period(),
60 features,
61 current_frame: Mutex::new(FrameData::new(
62 device,
63 features,
64 #[cfg(feature = "tracing-tracy")]
65 tracy_gpu_context.clone(),
66 )),
67 submitted_frames: Vec::new(),
68 finished_frames: Vec::new(),
69 #[cfg(feature = "tracing-tracy")]
70 tracy_gpu_context,
71 }))
72 }
73
74 fn current_frame_mut(&mut self) -> &mut FrameData {
75 self.0.current_frame.get_mut().expect("lock poisoned")
76 }
77
78 fn current_frame_lock(&self) -> impl DerefMut<Target = FrameData> + '_ {
79 self.0.current_frame.lock().expect("lock poisoned")
80 }
81
82 pub fn begin_frame(&mut self) {
84 let internal = &mut self.0;
85 let mut idx = 0;
86 while idx < internal.submitted_frames.len() {
87 let timestamp = internal.timestamp_period_ns;
88 if internal.submitted_frames[idx].run_mapped_callback(timestamp) {
89 let removed = internal.submitted_frames.swap_remove(idx);
90 internal.finished_frames.push(removed);
91 } else {
92 idx += 1;
93 }
94 }
95
96 self.current_frame_mut().begin();
97 }
98
99 pub fn resolve(&mut self, encoder: &mut CommandEncoder) {
103 self.current_frame_mut().resolve(encoder);
104 }
105
106 pub fn finish_frame(
113 &mut self,
114 device: &RenderDevice,
115 callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static,
116 ) {
117 #[cfg(feature = "tracing-tracy")]
118 let tracy_gpu_context = self.0.tracy_gpu_context.clone();
119
120 let internal = &mut self.0;
121 internal
122 .current_frame
123 .get_mut()
124 .expect("lock poisoned")
125 .finish(callback);
126
127 let new_frame = match internal.finished_frames.pop() {
129 Some(frame) => frame,
130 None => FrameData::new(
131 device,
132 internal.features,
133 #[cfg(feature = "tracing-tracy")]
134 tracy_gpu_context,
135 ),
136 };
137
138 let old_frame = core::mem::replace(
139 internal.current_frame.get_mut().expect("lock poisoned"),
140 new_frame,
141 );
142 internal.submitted_frames.push(old_frame);
143 }
144}
145
146impl RecordDiagnostics for DiagnosticsRecorder {
147 fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, span_name: Cow<'static, str>) {
148 self.current_frame_lock()
149 .begin_time_span(encoder, span_name);
150 }
151
152 fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E) {
153 self.current_frame_lock().end_time_span(encoder);
154 }
155
156 fn begin_pass_span<P: Pass>(&self, pass: &mut P, span_name: Cow<'static, str>) {
157 self.current_frame_lock().begin_pass(pass, span_name);
158 }
159
160 fn end_pass_span<P: Pass>(&self, pass: &mut P) {
161 self.current_frame_lock().end_pass(pass);
162 }
163}
164
165struct SpanRecord {
166 thread_id: ThreadId,
167 path_range: Range<usize>,
168 pass_kind: Option<PassKind>,
169 begin_timestamp_index: Option<u32>,
170 end_timestamp_index: Option<u32>,
171 begin_instant: Option<Instant>,
172 end_instant: Option<Instant>,
173 pipeline_statistics_index: Option<u32>,
174}
175
176struct FrameData {
177 timestamps_query_set: Option<QuerySet>,
178 num_timestamps: u32,
179 supports_timestamps_inside_passes: bool,
180 supports_timestamps_inside_encoders: bool,
181 pipeline_statistics_query_set: Option<QuerySet>,
182 num_pipeline_statistics: u32,
183 buffer_size: u64,
184 pipeline_statistics_buffer_offset: u64,
185 resolve_buffer: Option<Buffer>,
186 read_buffer: Option<Buffer>,
187 path_components: Vec<Cow<'static, str>>,
188 open_spans: Vec<SpanRecord>,
189 closed_spans: Vec<SpanRecord>,
190 is_mapped: Arc<AtomicBool>,
191 callback: Option<Box<dyn FnOnce(RenderDiagnostics) + Send + Sync + 'static>>,
192 #[cfg(feature = "tracing-tracy")]
193 tracy_gpu_context: tracy_client::GpuContext,
194}
195
196impl FrameData {
197 fn new(
198 device: &RenderDevice,
199 features: Features,
200 #[cfg(feature = "tracing-tracy")] tracy_gpu_context: tracy_client::GpuContext,
201 ) -> FrameData {
202 let wgpu_device = device.wgpu_device();
203 let mut buffer_size = 0;
204
205 let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) {
206 buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE;
207 Some(wgpu_device.create_query_set(&QuerySetDescriptor {
208 label: Some("timestamps_query_set"),
209 ty: QueryType::Timestamp,
210 count: MAX_TIMESTAMP_QUERIES,
211 }))
212 } else {
213 None
214 };
215
216 let pipeline_statistics_buffer_offset = buffer_size;
217
218 let pipeline_statistics_query_set =
219 if features.contains(Features::PIPELINE_STATISTICS_QUERY) {
220 buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE;
221 Some(wgpu_device.create_query_set(&QuerySetDescriptor {
222 label: Some("pipeline_statistics_query_set"),
223 ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()),
224 count: MAX_PIPELINE_STATISTICS,
225 }))
226 } else {
227 None
228 };
229
230 let (resolve_buffer, read_buffer) = if buffer_size > 0 {
231 let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor {
232 label: Some("render_statistics_resolve_buffer"),
233 size: buffer_size,
234 usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
235 mapped_at_creation: false,
236 });
237 let read_buffer = wgpu_device.create_buffer(&BufferDescriptor {
238 label: Some("render_statistics_read_buffer"),
239 size: buffer_size,
240 usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
241 mapped_at_creation: false,
242 });
243 (Some(resolve_buffer), Some(read_buffer))
244 } else {
245 (None, None)
246 };
247
248 FrameData {
249 timestamps_query_set,
250 num_timestamps: 0,
251 supports_timestamps_inside_passes: features
252 .contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES),
253 supports_timestamps_inside_encoders: features
254 .contains(Features::TIMESTAMP_QUERY_INSIDE_ENCODERS),
255 pipeline_statistics_query_set,
256 num_pipeline_statistics: 0,
257 buffer_size,
258 pipeline_statistics_buffer_offset,
259 resolve_buffer,
260 read_buffer,
261 path_components: Vec::new(),
262 open_spans: Vec::new(),
263 closed_spans: Vec::new(),
264 is_mapped: Arc::new(AtomicBool::new(false)),
265 callback: None,
266 #[cfg(feature = "tracing-tracy")]
267 tracy_gpu_context,
268 }
269 }
270
271 fn begin(&mut self) {
272 self.num_timestamps = 0;
273 self.num_pipeline_statistics = 0;
274 self.path_components.clear();
275 self.open_spans.clear();
276 self.closed_spans.clear();
277 }
278
279 fn write_timestamp(
280 &mut self,
281 encoder: &mut impl WriteTimestamp,
282 is_inside_pass: bool,
283 ) -> Option<u32> {
284 if !self.supports_timestamps_inside_encoders {
286 return None;
287 }
288
289 if is_inside_pass && !self.supports_timestamps_inside_passes {
290 return None;
291 }
292
293 if self.num_timestamps >= MAX_TIMESTAMP_QUERIES {
294 return None;
295 }
296
297 let set = self.timestamps_query_set.as_ref()?;
298 let index = self.num_timestamps;
299 encoder.write_timestamp(set, index);
300 self.num_timestamps += 1;
301 Some(index)
302 }
303
304 fn write_pipeline_statistics(
305 &mut self,
306 encoder: &mut impl WritePipelineStatistics,
307 ) -> Option<u32> {
308 if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS {
309 return None;
310 }
311
312 let set = self.pipeline_statistics_query_set.as_ref()?;
313 let index = self.num_pipeline_statistics;
314 encoder.begin_pipeline_statistics_query(set, index);
315 self.num_pipeline_statistics += 1;
316 Some(index)
317 }
318
319 fn open_span(
320 &mut self,
321 pass_kind: Option<PassKind>,
322 name: Cow<'static, str>,
323 ) -> &mut SpanRecord {
324 let thread_id = thread::current().id();
325
326 let parent = self
327 .open_spans
328 .iter()
329 .filter(|v| v.thread_id == thread_id)
330 .next_back();
331
332 let path_range = match &parent {
333 Some(parent) if parent.path_range.end == self.path_components.len() => {
334 parent.path_range.start..parent.path_range.end + 1
335 }
336 Some(parent) => {
337 self.path_components
338 .extend_from_within(parent.path_range.clone());
339 self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1
340 }
341 None => self.path_components.len()..self.path_components.len() + 1,
342 };
343
344 self.path_components.push(name);
345
346 self.open_spans.push(SpanRecord {
347 thread_id,
348 path_range,
349 pass_kind,
350 begin_timestamp_index: None,
351 end_timestamp_index: None,
352 begin_instant: None,
353 end_instant: None,
354 pipeline_statistics_index: None,
355 });
356
357 self.open_spans.last_mut().unwrap()
358 }
359
360 fn close_span(&mut self) -> &mut SpanRecord {
361 let thread_id = thread::current().id();
362
363 let iter = self.open_spans.iter();
364 let (index, _) = iter
365 .enumerate()
366 .filter(|(_, v)| v.thread_id == thread_id)
367 .next_back()
368 .unwrap();
369
370 let span = self.open_spans.swap_remove(index);
371 self.closed_spans.push(span);
372 self.closed_spans.last_mut().unwrap()
373 }
374
375 fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) {
376 let begin_instant = Instant::now();
377 let begin_timestamp_index = self.write_timestamp(encoder, false);
378
379 let span = self.open_span(None, name);
380 span.begin_instant = Some(begin_instant);
381 span.begin_timestamp_index = begin_timestamp_index;
382 }
383
384 fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) {
385 let end_timestamp_index = self.write_timestamp(encoder, false);
386
387 let span = self.close_span();
388 span.end_timestamp_index = end_timestamp_index;
389 span.end_instant = Some(Instant::now());
390 }
391
392 fn begin_pass<P: Pass>(&mut self, pass: &mut P, name: Cow<'static, str>) {
393 let begin_instant = Instant::now();
394
395 let begin_timestamp_index = self.write_timestamp(pass, true);
396 let pipeline_statistics_index = self.write_pipeline_statistics(pass);
397
398 let span = self.open_span(Some(P::KIND), name);
399 span.begin_instant = Some(begin_instant);
400 span.begin_timestamp_index = begin_timestamp_index;
401 span.pipeline_statistics_index = pipeline_statistics_index;
402 }
403
404 fn end_pass(&mut self, pass: &mut impl Pass) {
405 let end_timestamp_index = self.write_timestamp(pass, true);
406
407 let span = self.close_span();
408 span.end_timestamp_index = end_timestamp_index;
409
410 if span.pipeline_statistics_index.is_some() {
411 pass.end_pipeline_statistics_query();
412 }
413
414 span.end_instant = Some(Instant::now());
415 }
416
417 fn resolve(&mut self, encoder: &mut CommandEncoder) {
418 let Some(resolve_buffer) = &self.resolve_buffer else {
419 return;
420 };
421
422 match &self.timestamps_query_set {
423 Some(set) if self.num_timestamps > 0 => {
424 encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0);
425 }
426 _ => {}
427 }
428
429 match &self.pipeline_statistics_query_set {
430 Some(set) if self.num_pipeline_statistics > 0 => {
431 encoder.resolve_query_set(
432 set,
433 0..self.num_pipeline_statistics,
434 resolve_buffer,
435 self.pipeline_statistics_buffer_offset,
436 );
437 }
438 _ => {}
439 }
440
441 let Some(read_buffer) = &self.read_buffer else {
442 return;
443 };
444
445 encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size);
446 }
447
448 fn diagnostic_path(&self, range: &Range<usize>, field: &str) -> DiagnosticPath {
449 DiagnosticPath::from_components(
450 core::iter::once("render")
451 .chain(self.path_components[range.clone()].iter().map(|v| &**v))
452 .chain(core::iter::once(field)),
453 )
454 }
455
456 fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) {
457 let Some(read_buffer) = &self.read_buffer else {
458 let mut diagnostics = Vec::new();
461
462 for span in &self.closed_spans {
463 if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
464 diagnostics.push(RenderDiagnostic {
465 path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
466 suffix: "ms",
467 value: (end - begin).as_secs_f64() * 1000.0,
468 });
469 }
470 }
471
472 callback(RenderDiagnostics(diagnostics));
473 return;
474 };
475
476 self.callback = Some(Box::new(callback));
477
478 let is_mapped = self.is_mapped.clone();
479 read_buffer.slice(..).map_async(MapMode::Read, move |res| {
480 if let Err(e) = res {
481 tracing::warn!("Failed to download render statistics buffer: {e}");
482 return;
483 }
484
485 is_mapped.store(true, Ordering::Release);
486 });
487 }
488
489 fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool {
491 let Some(read_buffer) = &self.read_buffer else {
492 return true;
493 };
494 if !self.is_mapped.load(Ordering::Acquire) {
495 return false;
497 }
498 let Some(callback) = self.callback.take() else {
499 return true;
500 };
501
502 let data = read_buffer.slice(..).get_mapped_range();
503
504 let timestamps = data[..(self.num_timestamps * 8) as usize]
505 .chunks(8)
506 .map(|v| u64::from_le_bytes(v.try_into().unwrap()))
507 .collect::<Vec<u64>>();
508
509 let start = self.pipeline_statistics_buffer_offset as usize;
510 let len = (self.num_pipeline_statistics as usize) * 40;
511 let pipeline_statistics = data[start..start + len]
512 .chunks(8)
513 .map(|v| u64::from_le_bytes(v.try_into().unwrap()))
514 .collect::<Vec<u64>>();
515
516 let mut diagnostics = Vec::new();
517
518 for span in &self.closed_spans {
519 if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
520 diagnostics.push(RenderDiagnostic {
521 path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
522 suffix: "ms",
523 value: (end - begin).as_secs_f64() * 1000.0,
524 });
525 }
526
527 if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index)
528 {
529 let begin = timestamps[begin as usize] as f64;
530 let end = timestamps[end as usize] as f64;
531 let value = (end - begin) * (timestamp_period_ns as f64) / 1e6;
532
533 #[cfg(feature = "tracing-tracy")]
534 {
535 let name = &self.path_components[span.path_range.clone()].join("/");
539 let mut tracy_gpu_span =
540 self.tracy_gpu_context.span_alloc(name, "", "", 0).unwrap();
541 tracy_gpu_span.end_zone();
542 tracy_gpu_span.upload_timestamp_start(begin as i64);
543 tracy_gpu_span.upload_timestamp_end(end as i64);
544 }
545
546 diagnostics.push(RenderDiagnostic {
547 path: self.diagnostic_path(&span.path_range, "elapsed_gpu"),
548 suffix: "ms",
549 value,
550 });
551 }
552
553 if let Some(index) = span.pipeline_statistics_index {
554 let index = (index as usize) * 5;
555
556 if span.pass_kind == Some(PassKind::Render) {
557 diagnostics.push(RenderDiagnostic {
558 path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"),
559 suffix: "",
560 value: pipeline_statistics[index] as f64,
561 });
562
563 diagnostics.push(RenderDiagnostic {
564 path: self.diagnostic_path(&span.path_range, "clipper_invocations"),
565 suffix: "",
566 value: pipeline_statistics[index + 1] as f64,
567 });
568
569 diagnostics.push(RenderDiagnostic {
570 path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"),
571 suffix: "",
572 value: pipeline_statistics[index + 2] as f64,
573 });
574
575 diagnostics.push(RenderDiagnostic {
576 path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"),
577 suffix: "",
578 value: pipeline_statistics[index + 3] as f64,
579 });
580 }
581
582 if span.pass_kind == Some(PassKind::Compute) {
583 diagnostics.push(RenderDiagnostic {
584 path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"),
585 suffix: "",
586 value: pipeline_statistics[index + 4] as f64,
587 });
588 }
589 }
590 }
591
592 callback(RenderDiagnostics(diagnostics));
593
594 drop(data);
595 read_buffer.unmap();
596 self.is_mapped.store(false, Ordering::Release);
597
598 true
599 }
600}
601
602#[derive(Debug, Default, Clone, Resource)]
604pub struct RenderDiagnostics(Vec<RenderDiagnostic>);
605
606#[derive(Debug, Clone, Resource)]
608pub struct RenderDiagnostic {
609 pub path: DiagnosticPath,
610 pub suffix: &'static str,
611 pub value: f64,
612}
613
614#[derive(Debug, Default, Clone, Resource)]
620pub struct RenderDiagnosticsMutex(pub(crate) Arc<Mutex<Option<RenderDiagnostics>>>);
621
622pub fn sync_diagnostics(mutex: Res<RenderDiagnosticsMutex>, mut store: ResMut<DiagnosticsStore>) {
624 let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else {
625 return;
626 };
627
628 let time = Instant::now();
629
630 for diagnostic in &diagnostics.0 {
631 if store.get(&diagnostic.path).is_none() {
632 store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix));
633 }
634
635 store
636 .get_mut(&diagnostic.path)
637 .unwrap()
638 .add_measurement(DiagnosticMeasurement {
639 time,
640 value: diagnostic.value,
641 });
642 }
643}
644
645pub trait WriteTimestamp {
646 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32);
647}
648
649impl WriteTimestamp for CommandEncoder {
650 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
651 CommandEncoder::write_timestamp(self, query_set, index);
652 }
653}
654
655impl WriteTimestamp for RenderPass<'_> {
656 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
657 RenderPass::write_timestamp(self, query_set, index);
658 }
659}
660
661impl WriteTimestamp for ComputePass<'_> {
662 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
663 ComputePass::write_timestamp(self, query_set, index);
664 }
665}
666
667pub trait WritePipelineStatistics {
668 fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32);
669
670 fn end_pipeline_statistics_query(&mut self);
671}
672
673impl WritePipelineStatistics for RenderPass<'_> {
674 fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
675 RenderPass::begin_pipeline_statistics_query(self, query_set, index);
676 }
677
678 fn end_pipeline_statistics_query(&mut self) {
679 RenderPass::end_pipeline_statistics_query(self);
680 }
681}
682
683impl WritePipelineStatistics for ComputePass<'_> {
684 fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
685 ComputePass::begin_pipeline_statistics_query(self, query_set, index);
686 }
687
688 fn end_pipeline_statistics_query(&mut self) {
689 ComputePass::end_pipeline_statistics_query(self);
690 }
691}
692
693pub trait Pass: WritePipelineStatistics + WriteTimestamp {
694 const KIND: PassKind;
695}
696
697impl Pass for RenderPass<'_> {
698 const KIND: PassKind = PassKind::Render;
699}
700
701impl Pass for ComputePass<'_> {
702 const KIND: PassKind = PassKind::Compute;
703}
704
705#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
706pub enum PassKind {
707 Render,
708 Compute,
709}