Skip to content

retinanet

retinanet

niceml implementation of the retinanet This is a modified version of the original implementation

https://github.com/keras-team/keras-io/blob/master/examples/vision/retinanet.py

Classes

RetinaNetFactory

RetinaNetFactory(
    use_scale_lambda=True,
    allow_preconvolution=False,
    additional_conv_layers=None,
)

Bases: ModelFactory

Modelfactory which creates a RetinaNet for ObjectDetection

Source code in niceml/dlframeworks/keras/models/retinanet.py
def __init__(
    self,
    use_scale_lambda: bool = True,
    allow_preconvolution: bool = False,
    additional_conv_layers: Optional[List[int]] = None,
):
    self.use_scale_lambda = use_scale_lambda
    self.allow_preconvolution = allow_preconvolution
    self.additional_conv_layers = additional_conv_layers

Functions

build_head

build_head(output_filters, bias_init, layer_count=256)

Builds the class/box predictions head.

Parameters:

  • output_filters

    Number of convolution filters in the final layer.

  • bias_init

    Bias Initializer for the final convolution layer.

  • layer_count (int, default: 256 ) –

    number of layers for convolutions

Returns:

  • A keras sequential model representing either the classification or the box regression head depending on output_filters.

Source code in niceml/dlframeworks/keras/models/retinanet.py
def build_head(output_filters, bias_init, layer_count: int = 256):
    """Builds the class/box predictions head.

    Arguments:
      output_filters: Number of convolution filters in the final layer.
      bias_init: Bias Initializer for the final convolution layer.
      layer_count: number of layers for convolutions

    Returns:
      A keras sequential model representing either the classification
        or the box regression head depending on `output_filters`.
    """
    head = keras.Sequential([keras.Input(shape=[None, None, layer_count])])
    kernel_init = tf.initializers.RandomNormal(0.0, 0.01)
    for _ in range(4):
        head.add(
            keras.layers.Conv2D(
                layer_count, 3, padding="same", kernel_initializer=kernel_init
            )
        )
        head.add(keras.layers.ReLU())
    head.add(
        keras.layers.Conv2D(
            output_filters,
            3,
            1,
            padding="same",
            kernel_initializer=kernel_init,
            bias_initializer=bias_init,
        )
    )
    return head

feature_pyramid

feature_pyramid(
    layer_scaled_x,
    layer_scaled_2x,
    layer_scaled_4x,
    filter_count=256,
)

creates a feature pyramid

Source code in niceml/dlframeworks/keras/models/retinanet.py
def feature_pyramid(
    layer_scaled_x, layer_scaled_2x, layer_scaled_4x, filter_count: int = 256
):
    """creates a feature pyramid"""
    conv_c3_1x1 = layers.Conv2D(filter_count, 1, 1, "same")
    conv_c4_1x1 = layers.Conv2D(filter_count, 1, 1, "same")
    conv_c5_1x1 = layers.Conv2D(filter_count, 1, 1, "same")
    conv_c3_3x3 = layers.Conv2D(filter_count, 3, 1, "same")
    conv_c4_3x3 = layers.Conv2D(filter_count, 3, 1, "same")
    conv_c5_3x3 = layers.Conv2D(filter_count, 3, 1, "same")
    conv_c6_3x3 = layers.Conv2D(filter_count, 3, 2, "same")
    conv_c7_3x3 = layers.Conv2D(filter_count, 3, 2, "same")
    upsample_2x = layers.UpSampling2D(2)
    layer_scaled_x = conv_c3_1x1(layer_scaled_x)
    layer_scaled_2x = conv_c4_1x1(layer_scaled_2x)
    layer_scaled_4x = conv_c5_1x1(layer_scaled_4x)
    layer_scaled_2x = layer_scaled_2x + upsample_2x(layer_scaled_4x)
    layer_scaled_x = layer_scaled_x + upsample_2x(layer_scaled_2x)
    layer_scaled_x = conv_c3_3x3(layer_scaled_x)
    layer_scaled_2x = conv_c4_3x3(layer_scaled_2x)
    layer_scaled_4x = conv_c5_3x3(layer_scaled_4x)
    layer_scaled_8x = conv_c6_3x3(layer_scaled_4x)
    layer_scaled_16x = conv_c7_3x3(tf.nn.relu(layer_scaled_8x))
    return [
        layer_scaled_x,
        layer_scaled_2x,
        layer_scaled_4x,
        layer_scaled_8x,
        layer_scaled_16x,
    ]

get_backbone

get_backbone(input_size)

Builds ResNet50 with pre-trained imagenet weights

Source code in niceml/dlframeworks/keras/models/retinanet.py
def get_backbone(input_size: ImageSize) -> Model:
    """Builds ResNet50 with pre-trained imagenet weights"""
    input_shape = input_size.to_numpy_shape() + (3,)
    backbone = ResNet50(include_top=False, input_shape=input_shape)
    c3_output, c4_output, c5_output = [
        backbone.get_layer(layer_name).output
        for layer_name in ["conv3_block4_out", "conv4_block6_out", "conv5_block3_out"]
    ]
    return keras.Model(
        inputs=[backbone.inputs], outputs=[c3_output, c4_output, c5_output]
    )

retina_net

retina_net(
    feature_layers,
    num_classes,
    anchor_per_cell,
    coordinates_count,
    anchor_feature_count_list,
)

Builds the heads of the feature_layers and returns one output tensor

:param feature_layers: tensors with all feature maps :param num_classes: count of classes :param anchor_per_cell: how many anchors are generated per feature cell :param coordinates_count: how many coordinates are required to represent the object (e.g. bounding box) :param anchor_feature_count_list: a list of anchors per feature map :return: output_tensor with shape [batch_size, num_anchors, coordinates_count + num_classes]

Source code in niceml/dlframeworks/keras/models/retinanet.py
def retina_net(
    feature_layers: list,
    num_classes: int,
    anchor_per_cell: int,
    coordinates_count: int,
    anchor_feature_count_list: List[int],
):
    """
    Builds the heads of the feature_layers and returns one output tensor

    :param feature_layers: tensors with all feature maps
    :param num_classes: count of classes
    :param anchor_per_cell: how many anchors are generated per feature cell
    :param coordinates_count: how many coordinates are required to
           represent the object (e.g. bounding box)
    :param anchor_feature_count_list: a list of anchors per feature map
    :return: output_tensor with shape [batch_size, num_anchors, coordinates_count + num_classes]
    """
    prior_probability = tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
    cls_head = build_head(anchor_per_cell * num_classes, prior_probability)
    box_head = build_head(anchor_per_cell * coordinates_count, "zeros")

    cls_outputs = []
    box_outputs = []

    assert len(feature_layers) == len(anchor_feature_count_list)

    for feature, cur_anchor_count in zip(feature_layers, anchor_feature_count_list):
        cur_box_head = box_head(feature)
        box_outputs.append(
            tf.reshape(cur_box_head, [-1, cur_anchor_count, coordinates_count])
        )
        cur_cls_head = cls_head(feature)
        cls_outputs.append(
            tf.reshape(cur_cls_head, [-1, cur_anchor_count, num_classes])
        )
    cls_outputs = tf.concat(cls_outputs, axis=1)
    box_outputs = tf.concat(box_outputs, axis=1)
    return tf.concat([box_outputs, cls_outputs], axis=-1)