benchmark

A module to benchmark Pytorch model according to: size, speed, compute and energy.
import warnings
warnings.filterwarnings('ignore')

Size


source

get_model_size

 get_model_size (model, temp_path='temp_model.pth')

source

get_num_parameters

 get_num_parameters (model)

Speed


source

evaluate_gpu_speed

 evaluate_gpu_speed (model, dummy_input, warmup_rounds=50,
                     test_rounds=100)

source

evaluate_cpu_speed

 evaluate_cpu_speed (model, dummy_input, warmup_rounds=50,
                     test_rounds=100)

Compute


source

get_model_macs

 get_model_macs (model, inputs)

source

evaluate_gpu_memory_usage

 evaluate_gpu_memory_usage (model, dummy_input, warmup_rounds=10,
                            test_rounds=100)

Energy


source

evaluate_emissions

 evaluate_emissions (model, dummy_input, warmup_rounds=50,
                     test_rounds=100)

source

benchmark

 benchmark (model, dummy_input)
from torchvision.models import resnet18

model = resnet18()
dummy_input = torch.randn(64, 3, 224, 224)
benchmark(model, dummy_input)
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[codecarbon INFO @ 13:19:30] offline tracker init
[codecarbon INFO @ 13:19:30] [setup] RAM Tracking...
[codecarbon INFO @ 13:19:30] [setup] GPU Tracking...
[codecarbon INFO @ 13:19:30] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 13:19:30] [setup] CPU Tracking...
[codecarbon WARNING @ 13:19:30] No CPU tracking mode found. Falling back on CPU constant mode.
[codecarbon WARNING @ 13:19:32] We saw that you have a 12th Gen Intel(R) Core(TM) i9-12900K but we don't know it. Please contact us.
[codecarbon INFO @ 13:19:32] CPU Model on constant consumption mode: 12th Gen Intel(R) Core(TM) i9-12900K
[codecarbon INFO @ 13:19:32] >>> Tracker's metadata:
[codecarbon INFO @ 13:19:32]   Platform system: Linux-5.15.0-113-generic-x86_64-with-glibc2.31
[codecarbon INFO @ 13:19:32]   Python version: 3.9.0
[codecarbon INFO @ 13:19:32]   CodeCarbon version: 2.3.4
[codecarbon INFO @ 13:19:32]   Available RAM : 125.578 GB
[codecarbon INFO @ 13:19:32]   CPU count: 24
[codecarbon INFO @ 13:19:32]   CPU model: 12th Gen Intel(R) Core(TM) i9-12900K
[codecarbon INFO @ 13:19:32]   GPU count: 1
[codecarbon INFO @ 13:19:32]   GPU model: 1 x NVIDIA GeForce RTX 3090
[codecarbon INFO @ 13:19:33] Energy consumed for RAM : 0.000016 kWh. RAM Power : 47.091885566711426 W
[codecarbon INFO @ 13:19:33] Energy consumed for all GPUs : 0.000109 kWh. Total GPU Power : 328.4733410957834 W
[codecarbon INFO @ 13:19:33] Energy consumed for all CPUs : 0.000014 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 13:19:33] 0.000139 kWh of electricity used since the beginning.
Model Size: 46.84 MB (disk), 11.690M parameters
GPU Latency: 13.110 ms (± 0.022 ms)
GPU Throughput: 4881.84 inferences/sec
CPU Latency: 475.591 ms (± 6.319 ms)
CPU Throughput: 134.57 inferences/sec
Model MACs: 116.738G
Average GPU Memory Usage: 94.18 MB
Peak GPU Memory Usage: 504.97 MB
Average Carbon Emissions per Inference: 0.000526 gCO2e
Average Energy Consumption per Inference: 0.001386 Wh
{'disk_size': 46835512,
 'num_parameters': '11.690M',
 'gpu_latency': 13.109815979003907,
 'gpu_throughput': 4881.838166340362,
 'cpu_latency': 475.5907801212743,
 'cpu_throughput': 134.56947164467778,
 'macs': '116.738G',
 'avg_gpu_memory': 94181376.0,
 'peak_gpu_memory': 504967168,
 'avg_emissions': 5.256446662000115e-07,
 'avg_energy': 1.385974440225733e-06}
@torch.inference_mode()
def evaluate(model, dataloader, device=None, verbose=True):
    if device is None: device = torch.device("cuda")
    model.eval()
    model.to(device)

    with torch.no_grad():
        correct = 0
        total = 0
        local_acc = []
        loader = tqdm(dataloader.valid, desc="valid", leave=False)
        for i, data in enumerate(loader):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0) - 1
            correct += ((predicted.as_subclass(torch.Tensor) == labels.as_subclass(torch.Tensor)).sum().item())

        acc = 100 * correct / total
        if verbose:
            print(f"Valid Accuracy: {acc:.2f} %")
        return acc

source

compute_model_metrics

 compute_model_metrics (model, dls, dummy_input)

source

compare_models

 compare_models (model_list, dls)
from fastai.vision.all import *
path = untar_data(URLs.PETS)
files = get_image_files(path/"images")

def label_func(f): return f[0].isupper()

dls = ImageDataLoaders.from_name_func(path, files, label_func, item_tfms=Resize(64))
learn = vision_learner(dls, resnet18, metrics=accuracy)
learn.unfreeze()
learn.fit_one_cycle(5)
epoch train_loss valid_loss accuracy time
0 0.714393 0.421342 0.841678 00:03
1 0.396920 0.254652 0.888363 00:03
2 0.228655 0.230342 0.907307 00:03
3 0.139857 0.181267 0.933018 00:03
4 0.078330 0.166232 0.935724 00:03
model = deepcopy(learn.model)
from fasterai.prune.all import *

pr_cb = PruneCallback(sparsity=25, context='local', criteria=large_final, schedule=one_cycle, layer_type=[nn.Conv2d])
learn.fit_one_cycle(3, cbs=pr_cb)
Pruning until a sparsity of [25]%
epoch train_loss valid_loss accuracy time
0 0.050833 0.203945 0.932341 00:04
1 0.157106 0.257223 0.897835 00:04
2 0.189360 0.263265 0.894452 00:04
Sparsity at the end of epoch 0: [5.2]%
Sparsity at the end of epoch 1: [24.15]%
Sparsity at the end of epoch 2: [25.0]%
Final Sparsity: [25.0]%
pruned_model = deepcopy(learn.model)
from fasterai.quantize.all import *

qt = Quantizer()

q_model = qt.quantize(learn.model.to('cpu'), dls)
/home/HubensN/miniconda3/envs/clean/lib/python3.9/site-packages/torch/ao/quantization/observer.py:221: UserWarning: Please use quant_min and quant_max to specify the range for observers.                     reduce_range will be deprecated in a future release of PyTorch.
  warnings.warn(
compare_models([model, pruned_model, q_model], dls)
Valid Accuracy: 95.12 %
Valid Accuracy: 90.92 %
Valid Accuracy: 89.61 %
+---------------------+----------------+-------------------+-------------------+
|                     | Original Model |      Pruned Model |   Quantized Model |
+---------------------+----------------+-------------------+-------------------+
| Latency (ms/sample) | 3239.0         | 2640.8    (1.23x) | 2151.0    (1.51x) |
| Accuracy (%)        | 95.12          | 90.92    (-4.20%) | 89.61    (-5.51%) |
| Params (M)          | 11.7           | 6.69      (1.75x) |                 * |
| Size (MiB)          | 46912066       | 26829378  (1.75x) | 6827042   (6.87x) |
| MACs (M)            | 149            | 86        (1.73x) |                 * |
+---------------------+----------------+-------------------+-------------------+