High-level library to help with training and evaluating neural networks in PyTorch flexibly and transparently.
if __name__ == "__main__":
print("Let's have fun helping PyTorch-Ignite open source project !")
Slides: https://pytorch-ignite.github.io/pydata-riyadh-2022-slides/
Priyansi @Priyansi | A CS Undergrad. Currently working on the docs of PyTorch-Ignite and helping manage the community | |
Victor @vfdev-5 | Software Engineer at Quansight working on AI-related open source projects | |
François @fco-dv | Software Engineer at IFPEN |
Community-driven open source and NumFOCUS Affiliated Project
maintained by volunteers in the PyTorch community:
@vfdev-5, @ydcjeff, @KickItLikeShika, @sdesrozis, @alykhantejani, @anmolsjoshi,
@trsvchn, @fco-dv, @Priyansi, @Moh-Yakoub, @gucifer, @Ishan-Kumar2 ...
With the support of:
Google Summer of Code 2021
Google Season of Docs 2021
Hacktoberfest 2020 and 2021
PyData Global Mentored Sprint 2020 and 2021
Public meetings on Discord, open to everyone
Stay tuned for upcoming events …
| .
|
Computer Vision example with Fashion MNIST
Problem: 1 - how to classify images ?
model(image) -> predicted label
2 - How measure model performances ?
predicted labels vs correct labels
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
# Setup training/test data
training_data = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.FashionMNIST(root="data", train=False, transform=ToTensor())
batch_size = 64
# Create data loaders
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
# Optionally, for debugging:
for X, y in test_dataloader:
print("Shape of X [N, C, H, W]: ", X.shape)
print("Shape of y: ", y.shape, y.dtype)
break
# Output:
# Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
# Shape of y: torch.Size([64]) torch.int64
import torch
from torch import nn
device = "cuda" if torch.cuda.is_available() else "cpu"
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
model.train()
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
def test(dataloader, model, loss_fn):
# code to compute and print average loss and accuracy
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")
For NN training and evaluation:
model = Net()
train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.8)
criterion = torch.nn.NLLLoss()
max_epochs = 10
validate_every = 100
checkpoint_every = 100
def validate(model, val_loader):
model = model.eval()
num_correct = 0
num_examples = 0
for batch in val_loader:
input, target = batch
output = model(input)
correct = torch.eq(torch.round(output).type(target.type()), target).view(-1)
num_correct += torch.sum(correct).item()
num_examples += correct.shape[0]
return num_correct / num_examples
def checkpoint(model, optimizer, checkpoint_dir):
# ...
def save_best_model(model, current_accuracy, best_accuracy):
# ...
iteration = 0
best_accuracy = 0.0
for epoch in range(max_epochs):
for batch in train_loader:
model = model.train()
optimizer.zero_grad()
input, target = batch
output = model(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if iteration % validate_every == 0:
binary_accuracy = validate(model, val_loader)
print("After {} iterations, binary accuracy = {:.2f}"
.format(iteration, binary_accuracy))
save_best_model(model, binary_accuracy, best_accuracy)
if iteration % checkpoint_every == 0:
checkpoint(model, optimizer, checkpoint_dir)
iteration += 1
High-level library to help with training and evaluating neural networks in PyTorch flexibly and transparently.
|
|
|
With PyTorch-Ignite:
Let’s train a MNIST classifier with PyTorch-Ignite!
https://pytorch-ignite.ai/tutorials/beginner/01-getting-started/
Any questions before we go on ?
Participating GitHub repositories:
β‘οΈ PyTorch-Ignite - Library to help with training and evaluating neural networks
PyTorch-Ignite Examples repository - Examples, tutorials, and how-to guides
https://github.com/pytorch/ignite/blob/master/CONTRIBUTING.md#developing-ignite
https://github.com/pytorch/ignite/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
Participating GitHub repositories:
PyTorch-Ignite - Library to help with training and evaluating neural networks
β‘οΈ PyTorch-Ignite Examples repository - Examples, tutorials, and how-to guides
Your feedback is valuable!
pip install pytorch-ignite
Thank you for participatingin this sprint session ! | Follow us on and check out our new website: |
We are looking for contributors to help out with the project.
Everyone is welcome to contribute
.
| In its simpliest form:
|
No more coding for/while
loops on epochs and iterations. Users instantiate engines and run them.
from ignite.engine import Engine, Events, create_supervised_evaluator
from ignite.metrics import Accuracy
# Setup training engine:
def train_step(engine, batch):
# Users can do whatever they need on a single iteration
# Eg. forward/backward pass for any number of models, optimizers, etc.
# ...
trainer = Engine(train_step)
# Setup single model evaluation engine
evaluator = create_supervised_evaluator(model, metrics={"accuracy": Accuracy()})
def validation():
state = evaluator.run(validation_data_loader)
# print computed metrics
print(trainer.state.epoch, state.metrics)
# Run model's validation at the end of each epoch
trainer.add_event_handler(Events.EPOCH_COMPLETED, validation)
# Start the training
trainer.run(training_data_loader, max_epochs=100)
Handlers can be any function: e.g. lambda, simple function, class method, etc.
trainer.add_event_handler(Events.STARTED, lambda _: print("Start training"))
# attach handler with args, kwargs
mydata = [1, 2, 3, 4]
logger = ...
def on_training_ended(data):
print(f"Training is ended. mydata={data}")
# User can use variables from another scope
logger.info("Training is ended")
trainer.add_event_handler(Events.COMPLETED, on_training_ended, mydata)
# call any number of functions on a single event
trainer.add_event_handler(Events.COMPLETED, lambda engine: print(engine.state.times))
@trainer.on(Events.ITERATION_COMPLETED)
def log_something(engine):
print(engine.state.output)
# run the validation every 5 epochs
@trainer.on(Events.EPOCH_COMPLETED(every=5))
def run_validation():
# run validation
@trainer.on(Events.COMPLETED | Events.EPOCH_COMPLETED(every=10))
def run_another_validation():
# ...
# change some training variable once on 20th epoch
@trainer.on(Events.EPOCH_STARTED(once=20))
def change_training_variable():
# ...
# Trigger handler with customly defined frequency
@trainer.on(Events.ITERATION_COMPLETED(event_filter=first_x_iters))
def log_gradients():
# ...
from ignite.engine import EventEnum
# Define custom events
class BackpropEvents(EventEnum):
BACKWARD_STARTED = 'backward_started'
BACKWARD_COMPLETED = 'backward_completed'
OPTIM_STEP_COMPLETED = 'optim_step_completed'
def train_step(engine, batch):
# ...
loss = criterion(y_pred, y)
engine.fire_event(BackpropEvents.BACKWARD_STARTED)
loss.backward()
engine.fire_event(BackpropEvents.BACKWARD_COMPLETED)
optimizer.step()
engine.fire_event(BackpropEvents.OPTIM_STEP_COMPLETED)
# ...
trainer = Engine(train_step)
trainer.register_events(*BackpropEvents)
@trainer.on(BackpropEvents.BACKWARD_STARTED)
def function_before_backprop(engine):
# ...
50+ distributed ready out-of-the-box metrics to easily evaluate models.
precision = Precision(average=False)
recall = Recall(average=False)
F1_per_class = (precision * recall * 2 / (precision + recall))
F1_mean = F1_per_class.mean() # torch mean method
F1_mean.attach(engine, "F1")
.
|
|
Run the same code across all supported backends seamlessly
nccl
, gloo
, mpi
gloo
or nccl
communication backendpytorch/xla
import ignite.distributed as idist
def training(local_rank, *args, **kwargs):
dataloder_train = idist.auto_dataloder(dataset, ...)
model = ...
model = idist.auto_model(model)
optimizer = ...
optimizer = idist.auto_optimizer(optimizer)
backend = 'nccl' # or 'gloo', 'horovod', 'xla-tpu' or None
with idist.Parallel(backend) as parallel:
parallel.run(training)
Handle distributed launchers with the same code
torch.multiprocessing.spawn
torch.distributed.launch
horovodrun
slurm
High-level helper methods
idist.auto_model()
idist.auto_optim()
idist.auto_dataloader()
Collective operations
all_reduce
, all_gather
, and moreAny questions before we go on ?