Train a RetinaNet using KerasCV and Weights & Biases¶
This notebook demonstrates how you can put together a simple pipeline for training an object detection model using KerasCV and tracking and managing the experiment using Weights & Biases.
Original Source: https://keras.io/guides/keras_cv/object_detection_keras_cv
Install the Dependencies¶
In [ ]:
Copied!
!pip install --upgrade -q git+https://github.com/keras-team/keras-cv
!pip install --upgrade -q git+https://github.com/soumik12345/wandb-addons
!pip install --upgrade -q git+https://github.com/keras-team/keras-cv
!pip install --upgrade -q git+https://github.com/soumik12345/wandb-addons
In [ ]:
Copied!
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import keras_cv
from tqdm.auto import tqdm
import wandb
from wandb.keras import WandbMetricsLogger
from wandb_addons.keras.detection import WandBDetectionVisualizationCallback
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import keras_cv
from tqdm.auto import tqdm
import wandb
from wandb.keras import WandbMetricsLogger
from wandb_addons.keras.detection import WandBDetectionVisualizationCallback
Initialize a Weights & Biases run and Set up the Configs¶
In [ ]:
Copied!
wandb.init(
project="keras-cv-callbacks", entity="geekyrakshit", job_type="detection"
)
config = wandb.config
config.batch_size = 4
config.base_lr = 0.005
config.image_size = 640
config.model_name = "retinanet_resnet50_pascalvoc"
config.momentum = 0.9
config.global_clipnorm = 10.0
config.epochs = 3
class_ids = [
"Aeroplane",
"Bicycle",
"Bird",
"Boat",
"Bottle",
"Bus",
"Car",
"Cat",
"Chair",
"Cow",
"Dining Table",
"Dog",
"Horse",
"Motorbike",
"Person",
"Potted Plant",
"Sheep",
"Sofa",
"Train",
"Tvmonitor",
"Total",
]
config.class_mapping = dict(zip(range(len(class_ids)), class_ids))
wandb.init(
project="keras-cv-callbacks", entity="geekyrakshit", job_type="detection"
)
config = wandb.config
config.batch_size = 4
config.base_lr = 0.005
config.image_size = 640
config.model_name = "retinanet_resnet50_pascalvoc"
config.momentum = 0.9
config.global_clipnorm = 10.0
config.epochs = 3
class_ids = [
"Aeroplane",
"Bicycle",
"Bird",
"Boat",
"Bottle",
"Bus",
"Car",
"Cat",
"Chair",
"Cow",
"Dining Table",
"Dog",
"Horse",
"Motorbike",
"Person",
"Potted Plant",
"Sheep",
"Sofa",
"Train",
"Tvmonitor",
"Total",
]
config.class_mapping = dict(zip(range(len(class_ids)), class_ids))
Setup the Dataset Pipeline¶
In [ ]:
Copied!
train_ds = tfds.load(
"voc/2007",
split="train+validation",
with_info=False,
shuffle_files=True,
)
train_ds = train_ds.concatenate(
tfds.load(
"voc/2012",
split="train+validation",
with_info=False,
shuffle_files=True,
)
)
eval_ds = tfds.load("voc/2007", split="test", with_info=False)
train_ds = tfds.load(
"voc/2007",
split="train+validation",
with_info=False,
shuffle_files=True,
)
train_ds = train_ds.concatenate(
tfds.load(
"voc/2012",
split="train+validation",
with_info=False,
shuffle_files=True,
)
)
eval_ds = tfds.load("voc/2007", split="test", with_info=False)
In [ ]:
Copied!
def unpackage_tfds_inputs(inputs, bounding_box_format):
image = inputs["image"]
boxes = keras_cv.bounding_box.convert_format(
inputs["objects"]["bbox"],
images=image,
source="rel_yxyx",
target=bounding_box_format,
)
bounding_boxes = {
"classes": tf.cast(inputs["objects"]["label"], dtype=tf.float32),
"boxes": tf.cast(boxes, dtype=tf.float32),
}
return {
"images": tf.cast(image, tf.float32),
"bounding_boxes": bounding_boxes,
}
train_ds = train_ds.map(
lambda inputs: unpackage_tfds_inputs(inputs, bounding_box_format="xywh"),
num_parallel_calls=tf.data.AUTOTUNE,
)
eval_ds = eval_ds.map(
lambda inputs: unpackage_tfds_inputs(inputs, bounding_box_format="xywh"),
num_parallel_calls=tf.data.AUTOTUNE,
)
def unpackage_tfds_inputs(inputs, bounding_box_format):
image = inputs["image"]
boxes = keras_cv.bounding_box.convert_format(
inputs["objects"]["bbox"],
images=image,
source="rel_yxyx",
target=bounding_box_format,
)
bounding_boxes = {
"classes": tf.cast(inputs["objects"]["label"], dtype=tf.float32),
"boxes": tf.cast(boxes, dtype=tf.float32),
}
return {
"images": tf.cast(image, tf.float32),
"bounding_boxes": bounding_boxes,
}
train_ds = train_ds.map(
lambda inputs: unpackage_tfds_inputs(inputs, bounding_box_format="xywh"),
num_parallel_calls=tf.data.AUTOTUNE,
)
eval_ds = eval_ds.map(
lambda inputs: unpackage_tfds_inputs(inputs, bounding_box_format="xywh"),
num_parallel_calls=tf.data.AUTOTUNE,
)
In [ ]:
Copied!
train_ds = train_ds.ragged_batch(config.batch_size, drop_remainder=True)
eval_ds = eval_ds.ragged_batch(config.batch_size, drop_remainder=True)
train_ds = train_ds.ragged_batch(config.batch_size, drop_remainder=True)
eval_ds = eval_ds.ragged_batch(config.batch_size, drop_remainder=True)
In [ ]:
Copied!
augmenter = keras.Sequential(
layers=[
keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xywh"),
keras_cv.layers.JitteredResize(
target_size=(config.image_size, config.image_size),
scale_factor=(0.75, 1.3),
bounding_box_format="xywh"
),
]
)
train_ds = train_ds.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)
augmenter = keras.Sequential(
layers=[
keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xywh"),
keras_cv.layers.JitteredResize(
target_size=(config.image_size, config.image_size),
scale_factor=(0.75, 1.3),
bounding_box_format="xywh"
),
]
)
train_ds = train_ds.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)
In [ ]:
Copied!
inference_resizing = keras_cv.layers.Resizing(
config.image_size,
config.image_size,
bounding_box_format="xywh",
pad_to_aspect_ratio=True
)
eval_ds = eval_ds.map(inference_resizing, num_parallel_calls=tf.data.AUTOTUNE)
inference_resizing = keras_cv.layers.Resizing(
config.image_size,
config.image_size,
bounding_box_format="xywh",
pad_to_aspect_ratio=True
)
eval_ds = eval_ds.map(inference_resizing, num_parallel_calls=tf.data.AUTOTUNE)
In [ ]:
Copied!
def dict_to_tuple(inputs):
return inputs["images"], keras_cv.bounding_box.to_dense(
inputs["bounding_boxes"], max_boxes=32
)
train_ds = train_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
eval_ds = eval_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
eval_ds = eval_ds.prefetch(tf.data.AUTOTUNE)
def dict_to_tuple(inputs):
return inputs["images"], keras_cv.bounding_box.to_dense(
inputs["bounding_boxes"], max_boxes=32
)
train_ds = train_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
eval_ds = eval_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
eval_ds = eval_ds.prefetch(tf.data.AUTOTUNE)
Define and Compile the Model¶
In [ ]:
Copied!
model = keras_cv.models.RetinaNet.from_preset(
"resnet50_imagenet",
num_classes=len(config.class_mapping),
bounding_box_format="xywh",
)
print(model.prediction_decoder)
optimizer = keras.optimizers.SGD(
learning_rate=config.base_lr,
momentum=config.momentum,
global_clipnorm=config.global_clipnorm,
)
model.compile(
classification_loss="focal",
box_loss="smoothl1",
optimizer=optimizer,
metrics=None,
)
model = keras_cv.models.RetinaNet.from_preset(
"resnet50_imagenet",
num_classes=len(config.class_mapping),
bounding_box_format="xywh",
)
print(model.prediction_decoder)
optimizer = keras.optimizers.SGD(
learning_rate=config.base_lr,
momentum=config.momentum,
global_clipnorm=config.global_clipnorm,
)
model.compile(
classification_loss="focal",
box_loss="smoothl1",
optimizer=optimizer,
metrics=None,
)
Start Training¶
In [ ]:
Copied!
sampled_ds = train_ds.take(20)
model.fit(
sampled_ds,
validation_data=sampled_ds,
epochs=config.epochs,
callbacks=[
WandbMetricsLogger(log_freq="batch"),
WandBDetectionVisualizationCallback(
dataset=sampled_ds,
class_mapping=config.class_mapping,
max_batches_to_visualize=2,
),
],
)
# Finish the experiment
wandb.finish()
sampled_ds = train_ds.take(20)
model.fit(
sampled_ds,
validation_data=sampled_ds,
epochs=config.epochs,
callbacks=[
WandbMetricsLogger(log_freq="batch"),
WandBDetectionVisualizationCallback(
dataset=sampled_ds,
class_mapping=config.class_mapping,
max_batches_to_visualize=2,
),
],
)
# Finish the experiment
wandb.finish()