1use alloc::{borrow::Cow, sync::Arc};
2use core::{
3 ops::{DerefMut, Range},
4 sync::atomic::{AtomicBool, Ordering},
5};
6use std::thread::{self, ThreadId};
7
8use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore};
9use bevy_ecs::resource::Resource;
10use bevy_ecs::system::{Res, ResMut};
11use bevy_platform::time::Instant;
12use std::sync::Mutex;
13use wgpu::{
14 Buffer, BufferDescriptor, BufferUsages, CommandEncoder, ComputePass, Features, MapMode,
15 PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType, RenderPass,
16};
17
18use crate::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper};
19
20use super::RecordDiagnostics;
21
22const MAX_TIMESTAMP_QUERIES: u32 = 256;
24const MAX_PIPELINE_STATISTICS: u32 = 128;
25
26const TIMESTAMP_SIZE: u64 = 8;
27const PIPELINE_STATISTICS_SIZE: u64 = 40;
28
29struct DiagnosticsRecorderInternal {
30 timestamp_period_ns: f32,
31 features: Features,
32 current_frame: Mutex<FrameData>,
33 submitted_frames: Vec<FrameData>,
34 finished_frames: Vec<FrameData>,
35 #[cfg(feature = "tracing-tracy")]
36 tracy_gpu_context: tracy_client::GpuContext,
37}
38
39#[derive(Resource)]
42pub struct DiagnosticsRecorder(WgpuWrapper<DiagnosticsRecorderInternal>);
43
44impl DiagnosticsRecorder {
45 pub fn new(
47 adapter_info: &RenderAdapterInfo,
48 device: &RenderDevice,
49 queue: &RenderQueue,
50 ) -> DiagnosticsRecorder {
51 let features = device.features();
52
53 #[cfg(feature = "tracing-tracy")]
54 let tracy_gpu_context =
55 super::tracy_gpu::new_tracy_gpu_context(adapter_info, device, queue);
56 let _ = adapter_info; DiagnosticsRecorder(WgpuWrapper::new(DiagnosticsRecorderInternal {
59 timestamp_period_ns: queue.get_timestamp_period(),
60 features,
61 current_frame: Mutex::new(FrameData::new(
62 device,
63 features,
64 #[cfg(feature = "tracing-tracy")]
65 tracy_gpu_context.clone(),
66 )),
67 submitted_frames: Vec::new(),
68 finished_frames: Vec::new(),
69 #[cfg(feature = "tracing-tracy")]
70 tracy_gpu_context,
71 }))
72 }
73
74 fn current_frame_mut(&mut self) -> &mut FrameData {
75 self.0.current_frame.get_mut().expect("lock poisoned")
76 }
77
78 fn current_frame_lock(&self) -> impl DerefMut<Target = FrameData> + '_ {
79 self.0.current_frame.lock().expect("lock poisoned")
80 }
81
82 pub fn begin_frame(&mut self) {
84 let internal = &mut self.0;
85 let mut idx = 0;
86 while idx < internal.submitted_frames.len() {
87 let timestamp = internal.timestamp_period_ns;
88 if internal.submitted_frames[idx].run_mapped_callback(timestamp) {
89 let removed = internal.submitted_frames.swap_remove(idx);
90 internal.finished_frames.push(removed);
91 } else {
92 idx += 1;
93 }
94 }
95
96 self.current_frame_mut().begin();
97 }
98
99 pub fn resolve(&mut self, encoder: &mut CommandEncoder) {
103 self.current_frame_mut().resolve(encoder);
104 }
105
106 pub fn finish_frame(
113 &mut self,
114 device: &RenderDevice,
115 callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static,
116 ) {
117 #[cfg(feature = "tracing-tracy")]
118 let tracy_gpu_context = self.0.tracy_gpu_context.clone();
119
120 let internal = &mut self.0;
121 internal
122 .current_frame
123 .get_mut()
124 .expect("lock poisoned")
125 .finish(callback);
126
127 let new_frame = match internal.finished_frames.pop() {
129 Some(frame) => frame,
130 None => FrameData::new(
131 device,
132 internal.features,
133 #[cfg(feature = "tracing-tracy")]
134 tracy_gpu_context,
135 ),
136 };
137
138 let old_frame = core::mem::replace(
139 internal.current_frame.get_mut().expect("lock poisoned"),
140 new_frame,
141 );
142 internal.submitted_frames.push(old_frame);
143 }
144}
145
146impl RecordDiagnostics for DiagnosticsRecorder {
147 fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, span_name: Cow<'static, str>) {
148 self.current_frame_lock()
149 .begin_time_span(encoder, span_name);
150 }
151
152 fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E) {
153 self.current_frame_lock().end_time_span(encoder);
154 }
155
156 fn begin_pass_span<P: Pass>(&self, pass: &mut P, span_name: Cow<'static, str>) {
157 self.current_frame_lock().begin_pass(pass, span_name);
158 }
159
160 fn end_pass_span<P: Pass>(&self, pass: &mut P) {
161 self.current_frame_lock().end_pass(pass);
162 }
163}
164
165struct SpanRecord {
166 thread_id: ThreadId,
167 path_range: Range<usize>,
168 pass_kind: Option<PassKind>,
169 begin_timestamp_index: Option<u32>,
170 end_timestamp_index: Option<u32>,
171 begin_instant: Option<Instant>,
172 end_instant: Option<Instant>,
173 pipeline_statistics_index: Option<u32>,
174}
175
176struct FrameData {
177 timestamps_query_set: Option<QuerySet>,
178 num_timestamps: u32,
179 supports_timestamps_inside_passes: bool,
180 supports_timestamps_inside_encoders: bool,
181 pipeline_statistics_query_set: Option<QuerySet>,
182 num_pipeline_statistics: u32,
183 buffer_size: u64,
184 pipeline_statistics_buffer_offset: u64,
185 resolve_buffer: Option<Buffer>,
186 read_buffer: Option<Buffer>,
187 path_components: Vec<Cow<'static, str>>,
188 open_spans: Vec<SpanRecord>,
189 closed_spans: Vec<SpanRecord>,
190 is_mapped: Arc<AtomicBool>,
191 callback: Option<Box<dyn FnOnce(RenderDiagnostics) + Send + Sync + 'static>>,
192 #[cfg(feature = "tracing-tracy")]
193 tracy_gpu_context: tracy_client::GpuContext,
194}
195
196impl FrameData {
197 fn new(
198 device: &RenderDevice,
199 features: Features,
200 #[cfg(feature = "tracing-tracy")] tracy_gpu_context: tracy_client::GpuContext,
201 ) -> FrameData {
202 let wgpu_device = device.wgpu_device();
203 let mut buffer_size = 0;
204
205 let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) {
206 buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE;
207 Some(wgpu_device.create_query_set(&QuerySetDescriptor {
208 label: Some("timestamps_query_set"),
209 ty: QueryType::Timestamp,
210 count: MAX_TIMESTAMP_QUERIES,
211 }))
212 } else {
213 None
214 };
215
216 let pipeline_statistics_buffer_offset = buffer_size;
217
218 let pipeline_statistics_query_set =
219 if features.contains(Features::PIPELINE_STATISTICS_QUERY) {
220 buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE;
221 Some(wgpu_device.create_query_set(&QuerySetDescriptor {
222 label: Some("pipeline_statistics_query_set"),
223 ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()),
224 count: MAX_PIPELINE_STATISTICS,
225 }))
226 } else {
227 None
228 };
229
230 let (resolve_buffer, read_buffer) = if buffer_size > 0 {
231 let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor {
232 label: Some("render_statistics_resolve_buffer"),
233 size: buffer_size,
234 usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
235 mapped_at_creation: false,
236 });
237 let read_buffer = wgpu_device.create_buffer(&BufferDescriptor {
238 label: Some("render_statistics_read_buffer"),
239 size: buffer_size,
240 usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
241 mapped_at_creation: false,
242 });
243 (Some(resolve_buffer), Some(read_buffer))
244 } else {
245 (None, None)
246 };
247
248 FrameData {
249 timestamps_query_set,
250 num_timestamps: 0,
251 supports_timestamps_inside_passes: features
252 .contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES),
253 supports_timestamps_inside_encoders: features
254 .contains(Features::TIMESTAMP_QUERY_INSIDE_ENCODERS),
255 pipeline_statistics_query_set,
256 num_pipeline_statistics: 0,
257 buffer_size,
258 pipeline_statistics_buffer_offset,
259 resolve_buffer,
260 read_buffer,
261 path_components: Vec::new(),
262 open_spans: Vec::new(),
263 closed_spans: Vec::new(),
264 is_mapped: Arc::new(AtomicBool::new(false)),
265 callback: None,
266 #[cfg(feature = "tracing-tracy")]
267 tracy_gpu_context,
268 }
269 }
270
271 fn begin(&mut self) {
272 self.num_timestamps = 0;
273 self.num_pipeline_statistics = 0;
274 self.path_components.clear();
275 self.open_spans.clear();
276 self.closed_spans.clear();
277 }
278
279 fn write_timestamp(
280 &mut self,
281 encoder: &mut impl WriteTimestamp,
282 is_inside_pass: bool,
283 ) -> Option<u32> {
284 if !self.supports_timestamps_inside_encoders {
286 return None;
287 }
288
289 if is_inside_pass && !self.supports_timestamps_inside_passes {
290 return None;
291 }
292
293 if self.num_timestamps >= MAX_TIMESTAMP_QUERIES {
294 return None;
295 }
296
297 let set = self.timestamps_query_set.as_ref()?;
298 let index = self.num_timestamps;
299 encoder.write_timestamp(set, index);
300 self.num_timestamps += 1;
301 Some(index)
302 }
303
304 fn write_pipeline_statistics(
305 &mut self,
306 encoder: &mut impl WritePipelineStatistics,
307 ) -> Option<u32> {
308 if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS {
309 return None;
310 }
311
312 let set = self.pipeline_statistics_query_set.as_ref()?;
313 let index = self.num_pipeline_statistics;
314 encoder.begin_pipeline_statistics_query(set, index);
315 self.num_pipeline_statistics += 1;
316 Some(index)
317 }
318
319 fn open_span(
320 &mut self,
321 pass_kind: Option<PassKind>,
322 name: Cow<'static, str>,
323 ) -> &mut SpanRecord {
324 let thread_id = thread::current().id();
325
326 let parent = self.open_spans.iter().rfind(|v| v.thread_id == thread_id);
327
328 let path_range = match &parent {
329 Some(parent) if parent.path_range.end == self.path_components.len() => {
330 parent.path_range.start..parent.path_range.end + 1
331 }
332 Some(parent) => {
333 self.path_components
334 .extend_from_within(parent.path_range.clone());
335 self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1
336 }
337 None => self.path_components.len()..self.path_components.len() + 1,
338 };
339
340 self.path_components.push(name);
341
342 self.open_spans.push(SpanRecord {
343 thread_id,
344 path_range,
345 pass_kind,
346 begin_timestamp_index: None,
347 end_timestamp_index: None,
348 begin_instant: None,
349 end_instant: None,
350 pipeline_statistics_index: None,
351 });
352
353 self.open_spans.last_mut().unwrap()
354 }
355
356 fn close_span(&mut self) -> &mut SpanRecord {
357 let thread_id = thread::current().id();
358
359 let iter = self.open_spans.iter();
360 let (index, _) = iter
361 .enumerate()
362 .rfind(|(_, v)| v.thread_id == thread_id)
363 .unwrap();
364
365 let span = self.open_spans.swap_remove(index);
366 self.closed_spans.push(span);
367 self.closed_spans.last_mut().unwrap()
368 }
369
370 fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) {
371 let begin_instant = Instant::now();
372 let begin_timestamp_index = self.write_timestamp(encoder, false);
373
374 let span = self.open_span(None, name);
375 span.begin_instant = Some(begin_instant);
376 span.begin_timestamp_index = begin_timestamp_index;
377 }
378
379 fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) {
380 let end_timestamp_index = self.write_timestamp(encoder, false);
381
382 let span = self.close_span();
383 span.end_timestamp_index = end_timestamp_index;
384 span.end_instant = Some(Instant::now());
385 }
386
387 fn begin_pass<P: Pass>(&mut self, pass: &mut P, name: Cow<'static, str>) {
388 let begin_instant = Instant::now();
389
390 let begin_timestamp_index = self.write_timestamp(pass, true);
391 let pipeline_statistics_index = self.write_pipeline_statistics(pass);
392
393 let span = self.open_span(Some(P::KIND), name);
394 span.begin_instant = Some(begin_instant);
395 span.begin_timestamp_index = begin_timestamp_index;
396 span.pipeline_statistics_index = pipeline_statistics_index;
397 }
398
399 fn end_pass(&mut self, pass: &mut impl Pass) {
400 let end_timestamp_index = self.write_timestamp(pass, true);
401
402 let span = self.close_span();
403 span.end_timestamp_index = end_timestamp_index;
404
405 if span.pipeline_statistics_index.is_some() {
406 pass.end_pipeline_statistics_query();
407 }
408
409 span.end_instant = Some(Instant::now());
410 }
411
412 fn resolve(&mut self, encoder: &mut CommandEncoder) {
413 let Some(resolve_buffer) = &self.resolve_buffer else {
414 return;
415 };
416
417 match &self.timestamps_query_set {
418 Some(set) if self.num_timestamps > 0 => {
419 encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0);
420 }
421 _ => {}
422 }
423
424 match &self.pipeline_statistics_query_set {
425 Some(set) if self.num_pipeline_statistics > 0 => {
426 encoder.resolve_query_set(
427 set,
428 0..self.num_pipeline_statistics,
429 resolve_buffer,
430 self.pipeline_statistics_buffer_offset,
431 );
432 }
433 _ => {}
434 }
435
436 let Some(read_buffer) = &self.read_buffer else {
437 return;
438 };
439
440 encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size);
441 }
442
443 fn diagnostic_path(&self, range: &Range<usize>, field: &str) -> DiagnosticPath {
444 DiagnosticPath::from_components(
445 core::iter::once("render")
446 .chain(self.path_components[range.clone()].iter().map(|v| &**v))
447 .chain(core::iter::once(field)),
448 )
449 }
450
451 fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) {
452 let Some(read_buffer) = &self.read_buffer else {
453 let mut diagnostics = Vec::new();
456
457 for span in &self.closed_spans {
458 if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
459 diagnostics.push(RenderDiagnostic {
460 path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
461 suffix: "ms",
462 value: (end - begin).as_secs_f64() * 1000.0,
463 });
464 }
465 }
466
467 callback(RenderDiagnostics(diagnostics));
468 return;
469 };
470
471 self.callback = Some(Box::new(callback));
472
473 let is_mapped = self.is_mapped.clone();
474 read_buffer.slice(..).map_async(MapMode::Read, move |res| {
475 if let Err(e) = res {
476 tracing::warn!("Failed to download render statistics buffer: {e}");
477 return;
478 }
479
480 is_mapped.store(true, Ordering::Release);
481 });
482 }
483
484 fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool {
486 let Some(read_buffer) = &self.read_buffer else {
487 return true;
488 };
489 if !self.is_mapped.load(Ordering::Acquire) {
490 return false;
492 }
493 let Some(callback) = self.callback.take() else {
494 return true;
495 };
496
497 let data = read_buffer.slice(..).get_mapped_range();
498
499 let timestamps = data[..(self.num_timestamps * 8) as usize]
500 .chunks(8)
501 .map(|v| u64::from_le_bytes(v.try_into().unwrap()))
502 .collect::<Vec<u64>>();
503
504 let start = self.pipeline_statistics_buffer_offset as usize;
505 let len = (self.num_pipeline_statistics as usize) * 40;
506 let pipeline_statistics = data[start..start + len]
507 .chunks(8)
508 .map(|v| u64::from_le_bytes(v.try_into().unwrap()))
509 .collect::<Vec<u64>>();
510
511 let mut diagnostics = Vec::new();
512
513 for span in &self.closed_spans {
514 if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
515 diagnostics.push(RenderDiagnostic {
516 path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
517 suffix: "ms",
518 value: (end - begin).as_secs_f64() * 1000.0,
519 });
520 }
521
522 if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index)
523 {
524 let begin = timestamps[begin as usize] as f64;
525 let end = timestamps[end as usize] as f64;
526 let value = (end - begin) * (timestamp_period_ns as f64) / 1e6;
527
528 #[cfg(feature = "tracing-tracy")]
529 {
530 let name = &self.path_components[span.path_range.clone()].join("/");
534 let mut tracy_gpu_span =
535 self.tracy_gpu_context.span_alloc(name, "", "", 0).unwrap();
536 tracy_gpu_span.end_zone();
537 tracy_gpu_span.upload_timestamp_start(begin as i64);
538 tracy_gpu_span.upload_timestamp_end(end as i64);
539 }
540
541 diagnostics.push(RenderDiagnostic {
542 path: self.diagnostic_path(&span.path_range, "elapsed_gpu"),
543 suffix: "ms",
544 value,
545 });
546 }
547
548 if let Some(index) = span.pipeline_statistics_index {
549 let index = (index as usize) * 5;
550
551 if span.pass_kind == Some(PassKind::Render) {
552 diagnostics.push(RenderDiagnostic {
553 path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"),
554 suffix: "",
555 value: pipeline_statistics[index] as f64,
556 });
557
558 diagnostics.push(RenderDiagnostic {
559 path: self.diagnostic_path(&span.path_range, "clipper_invocations"),
560 suffix: "",
561 value: pipeline_statistics[index + 1] as f64,
562 });
563
564 diagnostics.push(RenderDiagnostic {
565 path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"),
566 suffix: "",
567 value: pipeline_statistics[index + 2] as f64,
568 });
569
570 diagnostics.push(RenderDiagnostic {
571 path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"),
572 suffix: "",
573 value: pipeline_statistics[index + 3] as f64,
574 });
575 }
576
577 if span.pass_kind == Some(PassKind::Compute) {
578 diagnostics.push(RenderDiagnostic {
579 path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"),
580 suffix: "",
581 value: pipeline_statistics[index + 4] as f64,
582 });
583 }
584 }
585 }
586
587 callback(RenderDiagnostics(diagnostics));
588
589 drop(data);
590 read_buffer.unmap();
591 self.is_mapped.store(false, Ordering::Release);
592
593 true
594 }
595}
596
597#[derive(Debug, Default, Clone, Resource)]
599pub struct RenderDiagnostics(Vec<RenderDiagnostic>);
600
601#[derive(Debug, Clone, Resource)]
603pub struct RenderDiagnostic {
604 pub path: DiagnosticPath,
605 pub suffix: &'static str,
606 pub value: f64,
607}
608
609#[derive(Debug, Default, Clone, Resource)]
615pub struct RenderDiagnosticsMutex(pub(crate) Arc<Mutex<Option<RenderDiagnostics>>>);
616
617pub fn sync_diagnostics(mutex: Res<RenderDiagnosticsMutex>, mut store: ResMut<DiagnosticsStore>) {
619 let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else {
620 return;
621 };
622
623 let time = Instant::now();
624
625 for diagnostic in &diagnostics.0 {
626 if store.get(&diagnostic.path).is_none() {
627 store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix));
628 }
629
630 store
631 .get_mut(&diagnostic.path)
632 .unwrap()
633 .add_measurement(DiagnosticMeasurement {
634 time,
635 value: diagnostic.value,
636 });
637 }
638}
639
640pub trait WriteTimestamp {
641 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32);
642}
643
644impl WriteTimestamp for CommandEncoder {
645 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
646 if cfg!(target_os = "macos") {
647 return;
652 }
653 CommandEncoder::write_timestamp(self, query_set, index);
654 }
655}
656
657impl WriteTimestamp for RenderPass<'_> {
658 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
659 RenderPass::write_timestamp(self, query_set, index);
660 }
661}
662
663impl WriteTimestamp for ComputePass<'_> {
664 fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
665 ComputePass::write_timestamp(self, query_set, index);
666 }
667}
668
669pub trait WritePipelineStatistics {
670 fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32);
671
672 fn end_pipeline_statistics_query(&mut self);
673}
674
675impl WritePipelineStatistics for RenderPass<'_> {
676 fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
677 RenderPass::begin_pipeline_statistics_query(self, query_set, index);
678 }
679
680 fn end_pipeline_statistics_query(&mut self) {
681 RenderPass::end_pipeline_statistics_query(self);
682 }
683}
684
685impl WritePipelineStatistics for ComputePass<'_> {
686 fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
687 ComputePass::begin_pipeline_statistics_query(self, query_set, index);
688 }
689
690 fn end_pipeline_statistics_query(&mut self) {
691 ComputePass::end_pipeline_statistics_query(self);
692 }
693}
694
695pub trait Pass: WritePipelineStatistics + WriteTimestamp {
696 const KIND: PassKind;
697}
698
699impl Pass for RenderPass<'_> {
700 const KIND: PassKind = PassKind::Render;
701}
702
703impl Pass for ComputePass<'_> {
704 const KIND: PassKind = PassKind::Compute;
705}
706
707#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
708pub enum PassKind {
709 Render,
710 Compute,
711}