import warnings
'ignore') warnings.filterwarnings(
benchmark
A module to benchmark Pytorch model according to: size, speed, compute and energy.
Size
get_model_size
get_model_size (model, temp_path='temp_model.pth')
get_num_parameters
get_num_parameters (model)
Speed
evaluate_gpu_speed
evaluate_gpu_speed (model, dummy_input, warmup_rounds=50, test_rounds=100)
evaluate_cpu_speed
evaluate_cpu_speed (model, dummy_input, warmup_rounds=50, test_rounds=100)
Compute
get_model_macs
get_model_macs (model, inputs)
evaluate_gpu_memory_usage
evaluate_gpu_memory_usage (model, dummy_input, warmup_rounds=10, test_rounds=100)
Energy
evaluate_emissions
evaluate_emissions (model, dummy_input, warmup_rounds=50, test_rounds=100)
benchmark
benchmark (model, dummy_input)
from torchvision.models import resnet18
= resnet18()
model = torch.randn(64, 3, 224, 224) dummy_input
benchmark(model, dummy_input)
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[codecarbon INFO @ 13:19:30] offline tracker init
[codecarbon INFO @ 13:19:30] [setup] RAM Tracking...
[codecarbon INFO @ 13:19:30] [setup] GPU Tracking...
[codecarbon INFO @ 13:19:30] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 13:19:30] [setup] CPU Tracking...
[codecarbon WARNING @ 13:19:30] No CPU tracking mode found. Falling back on CPU constant mode.
[codecarbon WARNING @ 13:19:32] We saw that you have a 12th Gen Intel(R) Core(TM) i9-12900K but we don't know it. Please contact us.
[codecarbon INFO @ 13:19:32] CPU Model on constant consumption mode: 12th Gen Intel(R) Core(TM) i9-12900K
[codecarbon INFO @ 13:19:32] >>> Tracker's metadata:
[codecarbon INFO @ 13:19:32] Platform system: Linux-5.15.0-113-generic-x86_64-with-glibc2.31
[codecarbon INFO @ 13:19:32] Python version: 3.9.0
[codecarbon INFO @ 13:19:32] CodeCarbon version: 2.3.4
[codecarbon INFO @ 13:19:32] Available RAM : 125.578 GB
[codecarbon INFO @ 13:19:32] CPU count: 24
[codecarbon INFO @ 13:19:32] CPU model: 12th Gen Intel(R) Core(TM) i9-12900K
[codecarbon INFO @ 13:19:32] GPU count: 1
[codecarbon INFO @ 13:19:32] GPU model: 1 x NVIDIA GeForce RTX 3090
[codecarbon INFO @ 13:19:33] Energy consumed for RAM : 0.000016 kWh. RAM Power : 47.091885566711426 W
[codecarbon INFO @ 13:19:33] Energy consumed for all GPUs : 0.000109 kWh. Total GPU Power : 328.4733410957834 W
[codecarbon INFO @ 13:19:33] Energy consumed for all CPUs : 0.000014 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 13:19:33] 0.000139 kWh of electricity used since the beginning.
Model Size: 46.84 MB (disk), 11.690M parameters
GPU Latency: 13.110 ms (± 0.022 ms)
GPU Throughput: 4881.84 inferences/sec
CPU Latency: 475.591 ms (± 6.319 ms)
CPU Throughput: 134.57 inferences/sec
Model MACs: 116.738G
Average GPU Memory Usage: 94.18 MB
Peak GPU Memory Usage: 504.97 MB
Average Carbon Emissions per Inference: 0.000526 gCO2e
Average Energy Consumption per Inference: 0.001386 Wh
{'disk_size': 46835512,
'num_parameters': '11.690M',
'gpu_latency': 13.109815979003907,
'gpu_throughput': 4881.838166340362,
'cpu_latency': 475.5907801212743,
'cpu_throughput': 134.56947164467778,
'macs': '116.738G',
'avg_gpu_memory': 94181376.0,
'peak_gpu_memory': 504967168,
'avg_emissions': 5.256446662000115e-07,
'avg_energy': 1.385974440225733e-06}
@torch.inference_mode()
def evaluate(model, dataloader, device=None, verbose=True):
if device is None: device = torch.device("cuda")
eval()
model.
model.to(device)
with torch.no_grad():
= 0
correct = 0
total = []
local_acc = tqdm(dataloader.valid, desc="valid", leave=False)
loader for i, data in enumerate(loader):
= data
images, labels = images.to(device), labels.to(device)
images, labels = model(images)
outputs = torch.max(outputs.data, 1)
_, predicted += labels.size(0) - 1
total += ((predicted.as_subclass(torch.Tensor) == labels.as_subclass(torch.Tensor)).sum().item())
correct
= 100 * correct / total
acc if verbose:
print(f"Valid Accuracy: {acc:.2f} %")
return acc
compute_model_metrics
compute_model_metrics (model, dls, dummy_input)
compare_models
compare_models (model_list, dls)
from fastai.vision.all import *
= untar_data(URLs.PETS)
path = get_image_files(path/"images")
files
def label_func(f): return f[0].isupper()
= ImageDataLoaders.from_name_func(path, files, label_func, item_tfms=Resize(64)) dls
= vision_learner(dls, resnet18, metrics=accuracy)
learn learn.unfreeze()
5) learn.fit_one_cycle(
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 0.714393 | 0.421342 | 0.841678 | 00:03 |
1 | 0.396920 | 0.254652 | 0.888363 | 00:03 |
2 | 0.228655 | 0.230342 | 0.907307 | 00:03 |
3 | 0.139857 | 0.181267 | 0.933018 | 00:03 |
4 | 0.078330 | 0.166232 | 0.935724 | 00:03 |
= deepcopy(learn.model) model
from fasterai.prune.all import *
= PruneCallback(sparsity=25, context='local', criteria=large_final, schedule=one_cycle, layer_type=[nn.Conv2d])
pr_cb 3, cbs=pr_cb) learn.fit_one_cycle(
Pruning until a sparsity of [25]%
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 0.050833 | 0.203945 | 0.932341 | 00:04 |
1 | 0.157106 | 0.257223 | 0.897835 | 00:04 |
2 | 0.189360 | 0.263265 | 0.894452 | 00:04 |
Sparsity at the end of epoch 0: [5.2]%
Sparsity at the end of epoch 1: [24.15]%
Sparsity at the end of epoch 2: [25.0]%
Final Sparsity: [25.0]%
= deepcopy(learn.model) pruned_model
from fasterai.quantize.all import *
= Quantizer()
qt
= qt.quantize(learn.model.to('cpu'), dls) q_model
/home/HubensN/miniconda3/envs/clean/lib/python3.9/site-packages/torch/ao/quantization/observer.py:221: UserWarning: Please use quant_min and quant_max to specify the range for observers. reduce_range will be deprecated in a future release of PyTorch.
warnings.warn(
compare_models([model, pruned_model, q_model], dls)
Valid Accuracy: 95.12 %
Valid Accuracy: 90.92 %
Valid Accuracy: 89.61 %
+---------------------+----------------+-------------------+-------------------+
| | Original Model | Pruned Model | Quantized Model |
+---------------------+----------------+-------------------+-------------------+
| Latency (ms/sample) | 3239.0 | 2640.8 (1.23x) | 2151.0 (1.51x) |
| Accuracy (%) | 95.12 | 90.92 (-4.20%) | 89.61 (-5.51%) |
| Params (M) | 11.7 | 6.69 (1.75x) | * |
| Size (MiB) | 46912066 | 26829378 (1.75x) | 6827042 (6.87x) |
| MACs (M) | 149 | 86 (1.73x) | * |
+---------------------+----------------+-------------------+-------------------+