import torch, torch.nn as nn
from fasterbench import benchmark, Report, ComparisonReportBenchmark Reports
Generate professional before/after compression reports with fasterbench
Introduction
After compressing a model (pruning, quantization, sparsification), you want to know: how much did it actually improve? fasterbench’s ComparisonReport answers this by benchmarking both versions and generating a professional report with metric deltas, improvement rankings, and optional radar charts.
This tutorial shows the full workflow: benchmark → compress → benchmark again → generate report.
Setup
Step 1: Benchmark the Original Model
from torchvision.models import resnet18
model = resnet18(pretrained=True)
x = torch.randn(1, 3, 224, 224)
result_before = benchmark(model, x, metrics=["size", "speed", "compute"])
result_before.summary()/home/nathan/miniconda3/envs/dev/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
/home/nathan/miniconda3/envs/dev/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
═══ Size ════════════════════════════════════
Disk: 44.67 MiB
Params: 11.69M
═══ Speed ═══════════════════════════════════
cpu: 27.41 ms │ 36.5 inf/s │ p99: 28.39 ms
cuda: 0.65 ms │ 1527.9 inf/s │ p99: 0.70 ms
═══ Compute ═════════════════════════════════
MACs: 1824.0 M
Step 2: Compress the Model
Use any compression technique — pruning, quantization, sparsification:
# Example: structured pruning with fasterai
from fasterai.prune.pruner import Pruner
from fasterai.core.criteria import large_final
pruner = Pruner(model, pruning_ratio=0.5, context='local', criteria=large_final,
example_inputs=x)
pruner.prune_model()Ignoring output layer: fc
Total ignored layers: 1
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch_pruning/dependency.py:796, in DependencyGraph._trace(self, model, example_inputs, forward_fn, output_transform) 795 try: --> 796 out = model(*example_inputs) 797 except: File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1786, in Module._call_impl(self, *args, **kwargs) 1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1784 or _global_backward_pre_hooks or _global_backward_hooks 1785 or _global_forward_hooks or _global_forward_pre_hooks): -> 1786 return forward_call(*args, **kwargs) 1788 result = None File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torchvision/models/resnet.py:285, in ResNet.forward(self, x) 284 def forward(self, x: Tensor) -> Tensor: --> 285 return self._forward_impl(x) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torchvision/models/resnet.py:268, in ResNet._forward_impl(self, x) 266 def _forward_impl(self, x: Tensor) -> Tensor: 267 # See note [TorchScript super()] --> 268 x = self.conv1(x) 269 x = self.bn1(x) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1881, in Module._call_impl(self, *args, **kwargs) 1880 try: -> 1881 return inner() 1882 except Exception: 1883 # run always called hooks if they have not already been run 1884 # For now only forward hooks have the always_call option but perhaps 1885 # this functionality should be added to full backward hooks as well. File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1829, in Module._call_impl.<locals>.inner() 1827 args = bw_hook.setup_input_hook(args) -> 1829 result = forward_call(*args, **kwargs) 1830 if _global_forward_hooks or self._forward_hooks: File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/conv.py:548, in Conv2d.forward(self, input) 547 def forward(self, input: Tensor) -> Tensor: --> 548 return self._conv_forward(input, self.weight, self.bias) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/conv.py:543, in Conv2d._conv_forward(self, input, weight, bias) 532 return F.conv2d( 533 F.pad( 534 input, self._reversed_padding_repeated_twice, mode=self.padding_mode (...) 541 self.groups, 542 ) --> 543 return F.conv2d( 544 input, weight, bias, self.stride, self.padding, self.dilation, self.groups 545 ) RuntimeError: Inference tensors cannot be saved for backward. Please do not use Tensors created in inference mode in computation tracked by autograd. To work around this, you can make a clone to get a normal tensor and use it in autograd, or use `torch.no_grad()` instead of `torch.inference_mode()`. During handling of the above exception, another exception occurred: RuntimeError Traceback (most recent call last) Cell In[3], line 5 2 from fasterai.prune.pruner import Pruner 3 from fasterai.core.criteria import large_final ----> 5 pruner = Pruner(model, pruning_ratio=0.5, context='local', criteria=large_final, 6 example_inputs=x) 7 pruner.prune_model() File ~/Developer/FasterAI-Labs/gh/fasterai/fasterai/prune/pruner.py:53, in Pruner.__init__(self, model, pruning_ratio, context, criteria, schedule, ignored_layers, example_inputs, *args, **kwargs) 50 # Clone example_inputs to escape inference mode (torch-pruning needs autograd for tracing) 51 _example_inputs = self.example_inputs.clone().to(next(self.model.parameters()).device) ---> 53 self.pruner = tp.pruner.MetaPruner( 54 self.model, 55 example_inputs=_example_inputs, 56 importance=self.group_importance, 57 pruning_ratio=self.default_pruning_ratio, 58 pruning_ratio_dict=self.pruning_ratio_dict, 59 ignored_layers=self.ignored_layers, 60 global_pruning=True if self.context=='global' else False, 61 num_heads=self.num_heads, 62 iterative_pruning_ratio_scheduler=tp_schedule, 63 *args, 64 **kwargs 65 ) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch_pruning/pruner/algorithms/base_pruner.py:133, in BasePruner.__init__(self, model, example_inputs, importance, global_pruning, pruning_ratio, pruning_ratio_dict, max_pruning_ratio, iterative_steps, iterative_pruning_ratio_scheduler, ignored_layers, round_to, isomorphic, in_channel_groups, out_channel_groups, num_heads, prune_num_heads, prune_head_dims, head_pruning_ratio, head_pruning_ratio_dict, customized_pruners, unwrapped_parameters, root_module_types, forward_fn, output_transform, channel_groups, ch_sparsity, ch_sparsity_dict) 129 self.ignored_params.append(layer) 131 ############################################### 132 # Build dependency graph --> 133 self.DG = dependency.DependencyGraph().build_dependency( 134 model, 135 example_inputs=example_inputs, 136 forward_fn=forward_fn, 137 output_transform=output_transform, 138 unwrapped_parameters=unwrapped_parameters, 139 customized_pruners=customized_pruners, 140 ignored_params=self.ignored_params, 141 ) 143 ############################################### 144 # Iterative pruning 145 # The pruner will prune the model iteratively for several steps to achieve the target pruning ratio 146 # E.g., if iterative_steps=5, pruning_ratio=0.5, the pruning ratio of each step will be [0.1, 0.2, 0.3, 0.4, 0.5] 147 self.iterative_steps = iterative_steps File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch_pruning/dependency.py:388, in DependencyGraph.build_dependency(self, model, example_inputs, forward_fn, output_transform, unwrapped_parameters, customized_pruners, ignored_layers, ignored_params, verbose) 385 assert torch.is_grad_enabled(), "Dependency graph relies on autograd for tracing. Please check and disable the torch.no_grad() in your code." 387 # Build computational graph through tracing. --> 388 self.module2node = self._trace( 389 model, example_inputs, forward_fn, output_transform=output_transform 390 ) 392 # Build dependency graph 393 self._build_dependency(self.module2node) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch_pruning/dependency.py:798, in DependencyGraph._trace(self, model, example_inputs, forward_fn, output_transform) 796 out = model(*example_inputs) 797 except: --> 798 out = model(example_inputs) 799 for hook in hooks: 800 hook.remove() File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1786, in Module._call_impl(self, *args, **kwargs) 1781 # If we don't have any hooks, we want to skip the rest of the logic in 1782 # this function, and just call forward. 1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1784 or _global_backward_pre_hooks or _global_backward_hooks 1785 or _global_forward_hooks or _global_forward_pre_hooks): -> 1786 return forward_call(*args, **kwargs) 1788 result = None 1789 called_always_called_hooks = set() File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torchvision/models/resnet.py:285, in ResNet.forward(self, x) 284 def forward(self, x: Tensor) -> Tensor: --> 285 return self._forward_impl(x) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torchvision/models/resnet.py:268, in ResNet._forward_impl(self, x) 266 def _forward_impl(self, x: Tensor) -> Tensor: 267 # See note [TorchScript super()] --> 268 x = self.conv1(x) 269 x = self.bn1(x) 270 x = self.relu(x) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1881, in Module._call_impl(self, *args, **kwargs) 1878 return inner() 1880 try: -> 1881 return inner() 1882 except Exception: 1883 # run always called hooks if they have not already been run 1884 # For now only forward hooks have the always_call option but perhaps 1885 # this functionality should be added to full backward hooks as well. 1886 for hook_id, hook in _global_forward_hooks.items(): File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/module.py:1829, in Module._call_impl.<locals>.inner() 1826 bw_hook = BackwardHook(self, full_backward_hooks, backward_pre_hooks) 1827 args = bw_hook.setup_input_hook(args) -> 1829 result = forward_call(*args, **kwargs) 1830 if _global_forward_hooks or self._forward_hooks: 1831 for hook_id, hook in ( 1832 *_global_forward_hooks.items(), 1833 *self._forward_hooks.items(), 1834 ): 1835 # mark that always called hook is run File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/conv.py:548, in Conv2d.forward(self, input) 547 def forward(self, input: Tensor) -> Tensor: --> 548 return self._conv_forward(input, self.weight, self.bias) File ~/miniconda3/envs/dev/lib/python3.12/site-packages/torch/nn/modules/conv.py:543, in Conv2d._conv_forward(self, input, weight, bias) 531 if self.padding_mode != "zeros": 532 return F.conv2d( 533 F.pad( 534 input, self._reversed_padding_repeated_twice, mode=self.padding_mode (...) 541 self.groups, 542 ) --> 543 return F.conv2d( 544 input, weight, bias, self.stride, self.padding, self.dilation, self.groups 545 ) RuntimeError: Inference tensors cannot be saved for backward. Please do not use Tensors created in inference mode in computation tracked by autograd. To work around this, you can make a clone to get a normal tensor and use it in autograd, or use `torch.no_grad()` instead of `torch.inference_mode()`.
Step 3: Benchmark the Compressed Model
result_after = benchmark(model, x, metrics=["size", "speed", "compute"])
result_after.summary()Step 4: Generate Comparison Report
report = ComparisonReport(
result_before, result_after,
before_name="ResNet-18 (original)",
after_name="ResNet-18 (50% pruned)",
title="Pruning Compression Report"
)
report.summary()Export to HTML or Markdown
Generate shareable reports:
# HTML report with embedded radar chart
report.to_html("compression_report.html", include_charts=True)
# Markdown report (great for GitHub PRs)
md = report.to_markdown("compression_report.md")
print(md[:200])Single Model Report
Report generates a standalone report for a single model:
single = Report(result_before, model_name="ResNet-18",
description="Baseline ImageNet classifier")
single.summary()
# Also supports HTML and Markdown export
single.to_html("resnet18_report.html")Programmatic Access
Access report data as dictionaries for further processing:
# Get all deltas as a list
for d in report.deltas:
if d.improved:
print(f"{d.label}: {d.delta_pct:+.1f}% ({'improved' if d.improved else 'regressed'})")
# Top 3 improvements
for d in report.top_improvements(3):
print(f" {d.label}: {d.before:.0f} → {d.after:.0f}")
# Serialize to dict (for JSON, databases, etc.)
data = report.as_dict()Summary
| Tool / Function | Purpose |
|---|---|
Report(result) |
Single model report |
ComparisonReport(before, after) |
Before/after comparison |
.summary() |
Console output |
.to_html(path) |
HTML with radar charts |
.to_markdown(path) |
Markdown (great for PRs) |
.deltas |
List of ReportMetricDelta |
.top_improvements(n) |
Top N improvements sorted by impact |
See Also
- Benchmark — Unified benchmarking API
- Visualization — Radar plots for visual comparison
- Report API — Full API reference