Feature tracking

The dv-processing library provides a few algorithm implementations to perform visual tracking of detected features. Feature tracking was intended for use in the frontends of visual odometry pipelines. While tracking on event input is feasible, the library also provides frame-based and hybrid (which uses both events and frames) trackers that allow to build visual odometry pipelines that leverage both input modalities.

Frame-based tracking

Frame based feature tracking is performed by using Lucas-Kanade tracking algorithm. The following sample shows how to use the available frame based tracker with a stream of incoming frames.

The following code sample shows how to run a feature tracker on frames coming from a live camera.

Note

This sample requires a camera that is capable of producing frames, e.g. a DAVIS series camera.

#include <dv-processing/features/feature_tracks.hpp>
#include <dv-processing/features/image_feature_lk_tracker.hpp>
#include <dv-processing/io/camera_capture.hpp>

#include <opencv2/highgui.hpp>

int main() {
    // Open any camera
    dv::io::CameraCapture capture;

    // Make sure it supports event stream output, throw an error otherwise
    if (!capture.isFrameStreamAvailable()) {
        throw dv::exceptions::RuntimeError("Input camera does not provide a frame stream.");
    }

    const cv::Size resolution = capture.getFrameResolution().value();

    // Initialize a preview window
    cv::namedWindow("Preview", cv::WINDOW_NORMAL);

    // Instantiate a visual tracker with known resolution, all parameters kept default
    auto tracker = dv::features::ImageFeatureLKTracker::RegularTracker(resolution);

    // Create a track container instance that is used to visualize tracks on an image
    dv::features::FeatureTracks tracks;

    // Run the frame processing while the camera is connected
    while (capture.isRunning()) {
        // Try to receive a frame, check if anything was received
        if (const auto frame = capture.getNextFrame()) {
            // Pass the frame to the tracker
            tracker->accept(*frame);

            // Run tracking
            const auto result = tracker->runTracking();

            // Pass tracking result into the track container which aggregates track history
            tracks.accept(result);

            // Generate and show a preview of recent tracking history
            cv::imshow("Preview", tracks.visualize(frame->image));
        }
        cv::waitKey(2);
    }

    return 0;
}

import dv_processing as dv
import cv2 as cv

# Open any camera
capture = dv.io.CameraCapture()

# Make sure it supports event stream output, throw an error otherwise
if not capture.isFrameStreamAvailable():
    raise RuntimeError("Input camera does not provide a frame stream.")

# Initialize preview window
cv.namedWindow("Preview", cv.WINDOW_NORMAL)

# Instantiate a visual tracker with known resolution, all parameters kept default
tracker = dv.features.ImageFeatureLKTracker.RegularTracker(capture.getEventResolution())

# Create a track container instance that is used to visualize tracks on an image
tracks = dv.features.FeatureTracks()

# Run the frame processing while the camera is connected
while capture.isRunning():
    # Try to receive a frame
    frame = capture.getNextFrame()

    # Check if anything was received
    if frame is not None:
        # Pass the frame to the tracker
        tracker.accept(frame)

        # Run tracking
        result = tracker.runTracking()

        # Pass tracking result into the track container which aggregates track history
        tracks.accept(result)

        # Generate and show a preview of recent tracking history
        cv.imshow("Preview", tracks.visualize(frame.image))

    cv.waitKey(2)

_images/image_lk_tracking.png — Tracked features on a live frame from a camera.

Event-based tracking

Event-based Lucas Kanade tracker

Features can be detected and tracked on a stream of events. The dv::features::EventFeatureLKTracker can perform this, it accumulates a frame from events internally, runs feature detection and performs Lucas-Kanade tracking on the accumulated frames.

The following sample code shows how to use the event-only Lucas-Kanade tracker on event stream coming from a live camera.

#include <dv-processing/features/event_feature_lk_tracker.hpp>
#include <dv-processing/features/feature_tracks.hpp>
#include <dv-processing/io/camera_capture.hpp>

#include <opencv2/highgui.hpp>

int main() {
    // Open any camera
    dv::io::CameraCapture capture;

    // Make sure it supports event stream output, throw an error otherwise
    if (!capture.isEventStreamAvailable()) {
        throw dv::exceptions::RuntimeError("Input camera does not provide an event stream.");
    }

    const cv::Size resolution = capture.getEventResolution().value();

    // Initialize a preview window
    cv::namedWindow("Preview", cv::WINDOW_NORMAL);

    // Instantiate a visual tracker with known resolution, all parameters kept default
    auto tracker = dv::features::EventFeatureLKTracker<>::RegularTracker(resolution);

    // Run tracking by accumulating frames with 100 FPS
    tracker->setFramerate(100);

    // Create a track container instance that is used to visualize tracks on an image
    dv::features::FeatureTracks tracks;

    // Run the frame processing while the camera is connected
    while (capture.isRunning()) {
        // Try to receive a batch of events, check if anything was received
        if (const auto events = capture.getNextEventBatch()) {
            // Pass the frame to the tracker
            tracker->accept(*events);

            // Run tracking
            const auto result = tracker->runTracking();

            // Since we are passing events in fine-grained batches, tracking will not execute
            // until enough events is received, returning invalid pointer if tracking did not execute
            if (!result) {
                continue;
            }

            // Pass tracking result into the track container which aggregates track history
            tracks.accept(result);

            // Generate and show a preview of recent tracking history
            cv::imshow("Preview", tracks.visualize(tracker->getAccumulatedFrame()));
        }
        cv::waitKey(2);
    }

    return 0;
}

import dv_processing as dv
import cv2 as cv

# Open any camera
capture = dv.io.CameraCapture()

# Make sure it supports event stream output, throw an error otherwise
if not capture.isEventStreamAvailable():
    raise RuntimeError("Input camera does not provide an event stream.")

# Initialize preview window
cv.namedWindow("Preview", cv.WINDOW_NORMAL)

# Instantiate a visual tracker with known resolution, all parameters kept default
tracker = dv.features.EventFeatureLKTracker.RegularTracker(capture.getEventResolution())

# Run tracking by accumulating frames with 100 FPS
tracker.setFramerate(100)

# Create a track container instance that is used to visualize tracks on an image
tracks = dv.features.FeatureTracks()

# Run the frame processing while the camera is connected
while capture.isRunning():
    # Try to receive a batch of events
    events = capture.getNextEventBatch()

    # Check if anything was received
    if events is not None:
        # Pass the events to the tracker
        tracker.accept(events)

        # Run tracking
        result = tracker.runTracking()

        # Since we are passing events in fine-grained batches, tracking will not execute
        # until enough events is received, returning a `None` if tracking did not execute
        if result is None:
            continue

        # Pass tracking result into the track container which aggregates track history
        tracks.accept(result)

        # Generate and show a preview of recent tracking history
        cv.imshow("Preview", tracks.visualize(tracker.getAccumulatedFrame()))

    cv.waitKey(2)

_images/event_lk_tracking.png — Tracked features on a stream of events from a camera.

Event-based mean shift tracker

Detect and track features on a stream of events using mean shift algorithm. Although commonly used for clustering, the dv::features::MeanShiftTracker class provides a tracking implementation on event data based on mean shift update. The class internally detects interesting features to track from events (by default it uses dv::features::EventBlobDetector) and tracks them by running a mean shift update on a normalized time surface of events. The tracking is performed by following the interesting points detected on the time surface. The algorithm will shift the tracks towards the latest events, since it takes into account the intensity of the time surface when performing the track location update.

The algorithm can be summarized as follows:

Given a set of events, detect interesting blobs using dv::features::EventBlobDetector. (Note, this step happens if no track has been initialized or if redetection is enabled)
Compute the time surface representation of a given interval duration.
Given a set of input track locations, for each non-converged track retrieve the time surface of events within a configured window.
Calculate the mean of coordinates for the retrieved neighborhood, weighting each coordinate by the time surface intensity value.
Shift the initial track location by a mode, which is a vector going from the initial point to the mean coordinate multiplied by a learning rate factor.
If the mode of a vector is lower than a configured threshold, the track is considered to have converged into the new position, otherwise repeat from step one.

This algorithm is useful to track event blobs that could be used as point of interest in event processing algorithms.

The following code sample shows the use of our mean-shift tracker implementation to find and track events on sample data generated synthetically.

#include <dv-processing/core/event.hpp>
#include <dv-processing/data/generate.hpp>
#include <dv-processing/features/mean_shift_tracker.hpp>
#include <dv-processing/visualization/events_visualizer.hpp>

#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>

[[nodiscard]] dv::EventStore generateEventClustersAtTime(const int64_t time, const std::vector<dv::Point2f> &clusters,
    const uint64_t numIter, const cv::Size &resolution, const int shift = -5);

int main() {
    using namespace std::chrono_literals;

    // Use VGA resolution
    const cv::Size resolution(640, 480);

    // Initialize a slicer
    dv::EventStreamSlicer slicer;

    // Initialize a preview window
    cv::namedWindow("Preview", cv::WINDOW_NORMAL);

    // Initialize a list of clusters for synthetic data generation
    const std::vector<dv::Point2f> clusters(
        {dv::Point2f(550.f, 400.f), dv::Point2f(70.f, 300.f), dv::Point2f(305.f, 100.f)});

    // Generate some random events for a background
    dv::EventStore events = dv::data::generate::uniformlyDistributedEvents(0, resolution, 10'000);

    std::vector<int64_t> timestamps = {0, 40000, 80000, 120000, 160000, 200000, 240000, 280000, 320000, 360000};

    uint64_t numIter = 0;
    for (const auto time : timestamps) {
        auto eventCluster = generateEventClustersAtTime(time, clusters, numIter, resolution);
        events            += eventCluster;
        events            += dv::data::generate::uniformlyDistributedEvents(time, resolution, 10000, numIter);
        numIter++;
    }

    // Bandwidth value defining the size of the search window in which updated track location will be searched
    const int bandwidth = 10;

    // Time window used for the normalized time surface computation. In this case we take the last 50ms of events and
    // compute a normalized time surface over them
    const dv::Duration timeWindow = 50ms;

    // Initialize a mean shift tracker.
    dv::features::MeanShiftTracker meanShift = dv::features::MeanShiftTracker(resolution, bandwidth, timeWindow);

    dv::visualization::EventVisualizer visualizer(resolution);

    // Register a callback every 40 milliseconds
    slicer.doEveryTimeInterval(40ms, [&](const dv::EventStore &events) {
        meanShift.accept(events);
        auto meanShiftTracks = meanShift.runTracking();

        if (!meanShiftTracks) {
            return;
        }

        // visualize mean shift tracks
        auto preview = visualizer.generateImage(events);
        auto points  = dv::data::fromTimedKeyPoints(meanShiftTracks->keypoints);
        cv::drawKeypoints(preview, points, preview, dv::visualization::colors::red);

        cv::imshow("Preview", preview);
        cv::waitKey(300);
    });

    slicer.accept(events);

    return EXIT_SUCCESS;
}

dv::EventStore generateEventClustersAtTime(const int64_t time, const std::vector<dv::Point2f> &clusters,
    const uint64_t numIter, const cv::Size &resolution, const int shift) {
    // Declare a region filter which we will use to filter out-of-bounds events in the next step
    dv::EventRegionFilter filter(cv::Rect(0, 0, resolution.width, resolution.height));
    const float offset = static_cast<float>(shift * static_cast<int>(numIter));
    dv::EventStore eventFiltered;
    for (const auto &cluster : clusters) {
        const auto xShift       = cluster.x() + offset;
        const auto yShift       = cluster.y() + offset;
        const dv::Point2f point = dv::Point2f(xShift, yShift);
        // Generate a batch of normally distributed events around each of the cluster centers
        filter.accept(dv::data::generate::normallyDistributedEvents(time, point, dv::Point2f(3.f, 3.f), 1'000));

        // Apply region filter to the events to filter out events outside valid dimensions
        eventFiltered += filter.generateEvents();
    }

    return eventFiltered;
}

import datetime

import dv_processing as dv
import cv2 as cv


def generate_event_clusters_at_time(time, clusters, num_iter, shift=-5):
    # Declare a region filter which we will use to filter out-of-bounds events in the next step
    event_filter = dv.EventRegionFilter((0, 0, resolution[0], resolution[1]))
    event_filtered = dv.EventStore()
    track_id = 0
    offset = shift * num_iter
    for cluster in clusters:
        x_coord = cluster[0] + offset
        y_coord = cluster[1] + offset

        # Generate a batch of normally distributed events around each of the cluster centers
        event_filter.accept(dv.data.generate.normallyDistributedEvents(time, (x_coord, y_coord), (3, 3), 1000))

        # Apply region filter to the events to filter out events outside valid dimensions
        event_filtered.add(event_filter.generateEvents())

        track_id += 1

    return event_filtered


def run_mean_shift(events):
    mean_shift.accept(events)
    mean_shift_tracks = mean_shift.runTracking()

    preview = visualizer.generateImage(events)

    # Draw markers on each of the track coordinates
    if len(mean_shift_tracks.keypoints) > 0:
        for index in range(len(mean_shift_tracks.keypoints)):
            track = mean_shift_tracks.keypoints[index]
            cv.drawMarker(preview, (int(track.pt[0]), int(track.pt[1])), dv.visualization.colors.red(), cv.MARKER_CROSS,
                          20, 2)

    # Show the preview image with detected tracks
    cv.imshow("Preview", preview)
    cv.waitKey(10)


# Use VGA resolution
resolution = (640, 480)

# Initialize a slicer
slicer = dv.EventStreamSlicer()

# Initialize a preview window
cv.namedWindow("Preview", cv.WINDOW_NORMAL)

# Initialize a list of clusters for synthetic data generation
clusters = [(550, 400), (70, 300), (305, 100)]

# Generate some random events for a background
events = dv.data.generate.uniformlyDistributedEvents(0, resolution, 10000)

timestamps = [0, 40000, 80000, 120000, 160000, 200000, 240000, 280000, 320000, 360000]

num_iter = 0
for time in timestamps:
    event_cluster = generate_event_clusters_at_time(time, clusters, num_iter)
    events.add(event_cluster)
    events.add(dv.data.generate.uniformlyDistributedEvents(time, resolution, 10000, num_iter))
    num_iter += 1

# parameter defining the spatial window [pixels] in which the new track position will be searched
bandwidth = 10

# window of time used to compute the time surface used for the tracking update
time_window = datetime.timedelta(milliseconds=50)

# Initialize a mean shift tracker
mean_shift = dv.features.MeanShiftTracker(resolution, bandwidth, time_window)

visualizer = dv.visualization.EventVisualizer(resolution)

slicer.doEveryTimeInterval(datetime.timedelta(milliseconds=40), run_mean_shift)

slicer.accept(events)

_images/tracker_preview.png — Expected output of the mean-shift-tracker sample usage. Tracking eight blobs marked with red crosses.

Hybrid tracking

The high-framerate tracking on event stream suggests that the feature tracking on frames can be improved by tracking features between frames on intermediate accumulated frames from events. The intermediate tracking results can be used as a prior to the frame tracking algorithm. Such an approach is implemented in dv::features::EventCombinedLKTracker, it performs regular Lucas-Kanade tracking on frames, but also constructs intermediate accumulated frames to predict the locations of tracks in the next frame and uses this information as a prior to the Lucas-Kanade tracking algorithm.

The following sample code shows how to use the hybrid event-frame Lucas-Kanade tracker on both streams coming from a live camera.

Note

This sample requires a camera that is capable of producing frames and events, e.g. a DAVIS series camera.

#include <dv-processing/features/event_combined_lk_tracker.hpp>
#include <dv-processing/features/feature_tracks.hpp>
#include <dv-processing/io/camera_capture.hpp>

#include <opencv2/highgui.hpp>

int main() {
    // Open any camera
    dv::io::CameraCapture capture;

    // Make sure it supports correct stream outputs, throw an error otherwise
    if (!capture.isEventStreamAvailable()) {
        throw dv::exceptions::RuntimeError("Input camera does not provide an event stream.");
    }
    if (!capture.isFrameStreamAvailable()) {
        throw dv::exceptions::RuntimeError("Input camera does not provide a frame stream.");
    }

    const cv::Size resolution = capture.getEventResolution().value();

    // Initialize a preview window
    cv::namedWindow("Preview", cv::WINDOW_NORMAL);

    // Instantiate a visual tracker with known resolution, all parameters kept default
    auto tracker = dv::features::EventCombinedLKTracker<>::RegularTracker(resolution);

    // Accumulate and track on 5 intermediate accumulated frames between each actual frame pair
    tracker->setNumIntermediateFrames(5);

    // Create a track container instance that is used to visualize tracks on an image
    dv::features::FeatureTracks tracks;

    // Use a queue to store incoming frames to make sure the all data has arrived prior to running the tracking
    std::queue<dv::Frame> frameQueue;

    // Run the frame processing while the camera is connected
    while (capture.isRunning()) {
        // Try to receive a frame, check if anything was received
        if (const auto frame = capture.getNextFrame()) {
            // Push the received frame into the frame queue
            frameQueue.push(*frame);
        }

        // Try to receive a batch of events, check if anything was received
        if (const auto events = capture.getNextEventBatch()) {
            // Pass the frame to the tracker
            tracker->accept(*events);

            // Check if we have ready frames and if enough events have arrived already
            if (frameQueue.empty() || frameQueue.front().timestamp > events->getHighestTime()) {
                continue;
            }

            // Take the last frame from the queue
            const auto frame = frameQueue.front();

            // Pass it to the tracker as well
            tracker->accept(frame);

            // Remove the last used frame from the queue
            frameQueue.pop();

            // Run tracking
            const auto result = tracker->runTracking();

            // Validate that the tracking was successful
            if (!result) {
                continue;
            }

            // Pass tracking result into the track container which aggregates track history
            tracks.accept(result);

            // Generate and show a preview of recent tracking history on both accumulated frames and the frame image
            // Take the set of intermediate accumulated frames from the tracker
            const auto accumulatedFrames = tracker->getAccumulatedFrames();
            if (!accumulatedFrames.empty()) {
                cv::Mat preview;
                // Draw visualization on both image and concatenate them horizontally
                cv::hconcat(
                    tracks.visualize(accumulatedFrames.back().pyramid.front()), tracks.visualize(frame.image), preview);
                // Show the final preview image
                cv::imshow("Preview", preview);
            }
        }

        cv::waitKey(2);
    }

    return 0;
}

import dv_processing as dv
import cv2 as cv
from datetime import timedelta

# Open any camera
capture = dv.io.CameraCapture()

# Make sure it supports correct stream outputs, throw an error otherwise
if not capture.isEventStreamAvailable():
    raise RuntimeError("Input camera does not provide an event stream.")
if not capture.isEventStreamAvailable():
    raise RuntimeError("Input camera does not provide a frame stream.")

# Initialize preview window
cv.namedWindow("Preview", cv.WINDOW_NORMAL)

# Instantiate a visual tracker with known resolution, all parameters kept default
tracker = dv.features.EventCombinedLKTracker.RegularTracker(capture.getEventResolution())

# Accumulate and track on 5 intermediate accumulated frames between each actual frame pair
tracker.setNumIntermediateFrames(5)

# Create a track container instance that is used to visualize tracks on an image
tracks = dv.features.FeatureTracks()

# Use a list to store incoming frames to make sure the all data has arrived prior to running the tracking
frame_queue = []

# Run the frame processing while the camera is connected
while capture.isRunning():
    # Try to receive a frame
    frame = capture.getNextFrame()

    # Check if anything was received
    if frame is not None:
        frame_queue.append(frame)

    # Try to receive a batch of events
    events = capture.getNextEventBatch()

    # Check if anything was received
    if events is not None:
        # Pass the events to the tracker
        tracker.accept(events)

        # Check if we have ready frames and if enough events have arrived already
        if len(frame_queue) == 0 or frame_queue[0].timestamp > events.getHighestTime():
            continue

        # Take the last frame from the queue and remove it
        frame = frame_queue.pop(0)

        # Pass it to the tracker as well
        tracker.accept(frame)

        # Run tracking
        result = tracker.runTracking()

        # Validate that the tracking was successful
        if result is None:
            continue

        # Pass tracking result into the track container which aggregates track history
        tracks.accept(result)

        # Generate and show a preview of recent tracking history on both accumulated frames and the frame image
        # Take the set of intermediate accumulated frames from the tracker
        accumulated_frames = tracker.getAccumulatedFrames()
        if len(accumulated_frames) > 0:
            # Draw visualization on both image and concatenate them horizontally
            preview = cv.hconcat([tracks.visualize(accumulated_frames[-1].pyramid[0]), tracks.visualize(frame.image)])

            # Show the final preview image
            cv.imshow("Preview", preview)

    cv.waitKey(2)

_images/combined_lk_tracking.png — Tracked features on frame and event streams from a camera.