Developing a WinRT component to create a video file using Media Foundation

After creating a GIF file using my previous article, I propose you to use the power of WinRT to create a component in order to produce video file from your drawings (using for instance a canvas with Javascript).

To do so, we will use Media Foundation COM components. Media Foundation is the next generation multimedia platform for Windows that enables developers, consumers, and content providers to embrace the new wave of premium content with enhanced robustness, unparalleled quality, and seamless interoperability.

And as I said before, Media Foundation is based on COM components. But thanks to C++ projects for Windows Store, we can create a WinRT component on top of Media Foundation in order to use it from our Javascript or .NET projects.

DavBlog

This WinRT component will be called VideoGenerator.

The VideoGenerator component

The VideoGenerator class will have a main method called AppendNewFrame. This method is used to add a new frame to the video.

The class requires the following information to work:

  • Frame’s width
  • Frame’s height
  • Delay between frames (in milliseconds)

The class itself can be like the following:

public ref class VideoGenerator sealed
{

public:
    VideoGenerator(UINT32 width, UINT32 height, Windows::Storage::Streams::IRandomAccessStream^ stream,
UINT32 delay); virtual ~VideoGenerator(); void AppendNewFrame(const Array<byte> ^videoFrameBuffer); void Finalize(); };

Obviously, this class also requires private data to work:

UINT32 videoWidth;
UINT32 videoHeight;
UINT32 fps;
UINT32 bitRate;
UINT32 frameSize;
GUID   encodingFormat;
GUID   inputFormat;
  • videoWidth is the width of a frame
  • videoHeight is the height of a frame
  • fps is the number of frames per second
  • bitRate is obviously the bits rate
  • frameSize is equal to videoWidth * videoHeight
  • encodingFormat defines the encoder to use (WMV for us)
  • inputFormat defines the format of the pixels sent to this class (RGB32)

 

Initialization

Starting with this information, here is the constructor:

VideoGenerator::VideoGenerator(UINT32 width, UINT32 height, 
Windows::Storage::Streams::
IRandomAccessStream^ stream, UINT32 delay) { videoWidth = width; videoHeight = height; fps = 25; bitRate = 400000; frameSize = videoWidth * videoHeight; encodingFormat = MFVideoFormat_WMV3; inputFormat = MFVideoFormat_RGB32; HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); if (SUCCEEDED(hr)) { hr = MFStartup(MF_VERSION); if (SUCCEEDED(hr)) { hr = InitializeSinkWriter(stream); if (SUCCEEDED(hr)) { initiated = true; rtStart = 0; rtDuration = (10000000 * delay) / 1000; } } } }

 

This method needs to initialize COM engine (CoInitializeEx) and also needs to initialize Media Foundation (MFStartup).

To compile you will need the following includes:

#include <Windows.h>
#include <mfapi.h>
#include <mfidl.h>
#include <Mfreadwrite.h>
#include <mferror.h>
#include <wrlclient.h>
#include <memory>

Please note that the bigger the bitRate is the bigger the file will be.

The next step after initializing things is to create a sink writer (IMFSinkWriter) which is used to control writings to the output file.

The sink writer enables you to author media files by passing in uncompressed or encoded data. For example, you can use it to re-encode a video file, or to capture live video from a webcam to a file.

In our case we will use it by passing uncompressed data from .NET or javascript.

HRESULT VideoGenerator::InitializeSinkWriter(Windows::Storage::Streams::IRandomAccessStream^ stream)
{    
    ComPtr<IMFAttributes> spAttr;
    ComPtr<IMFMediaType>  mediaTypeOut;   
    ComPtr<IMFMediaType>  mediaTypeIn;           
    ComPtr<IMFByteStream> spByteStream;
    HRESULT hr = MFCreateMFByteStreamOnStreamEx((IUnknown*)stream, &spByteStream);

    if (SUCCEEDED(hr))
    {        
        MFCreateAttributes(&spAttr, 10);
        spAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true);

        hr = MFCreateSinkWriterFromURL(L".wmv", spByteStream.Get(), spAttr.Get(), &sinkWriter);
    }

    // Set the output media type.
    if (SUCCEEDED(hr))
    {
        hr = MFCreateMediaType(&mediaTypeOut);   
    }
    if (SUCCEEDED(hr))
    {
        hr = mediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);     
    }
    if (SUCCEEDED(hr))
    {
        hr = mediaTypeOut->SetGUID(MF_MT_SUBTYPE, encodingFormat);   
    }
    if (SUCCEEDED(hr))
    {
        hr = mediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, bitRate);   
    }
    if (SUCCEEDED(hr))
    {
        hr = mediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);   
    }
    if (SUCCEEDED(hr))
    {
        hr = MFSetAttributeSize(mediaTypeOut.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight);   
    }
    if (SUCCEEDED(hr))
    {
        hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_FRAME_RATE, fps, 1);   
    }
    if (SUCCEEDED(hr))
    {
        hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1);   
    }
    if (SUCCEEDED(hr))
    {
        hr = sinkWriter->AddStream(mediaTypeOut.Get(), &streamIndex);   
    }

    // Set the input media type.
    if (SUCCEEDED(hr))
    {
        hr = MFCreateMediaType(&mediaTypeIn);   
    }
    if (SUCCEEDED(hr))
    {
        hr = mediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);   
    }
    if (SUCCEEDED(hr))
    {
        hr = mediaTypeIn->SetGUID(MF_MT_SUBTYPE, inputFormat);     
    }
    if (SUCCEEDED(hr))
    {
        hr = mediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);   
    }
    if (SUCCEEDED(hr))
    {
        hr = MFSetAttributeSize(mediaTypeIn.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight);   
    }
    if (SUCCEEDED(hr))
    {
        hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_FRAME_RATE, fps, 1);   
    }
    if (SUCCEEDED(hr))
    {
        hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1);   
    }
    if (SUCCEEDED(hr))
    {
        hr = sinkWriter->SetInputMediaType(streamIndex, mediaTypeIn.Get(), NULL);   
    }

    // Tell the sink writer to start accepting data.
    if (SUCCEEDED(hr))
    {
        hr = sinkWriter->BeginWriting();
    }

    return hr;
}

Starting from a WinRT Stream, you have to call MFCreateMFByteStreamOnStreamEx method to convert it to a IMFByteStream object. Then using MFCreateSinkWriterFromURL you can grab the sink writer.

The following part of the code is dedicated to configure the input and output format of the sink writer accordingly to defined parameters (mediaTypeOut and mediaTypeIn objects).

The mediaTypeOut is responsible for:

The mediaTypeIn is responsible for:

  • MF_MT_MAJOR_TYPE: The main type of the file (Video here)
  • MF_MT_SUBTYPE: The subtype (RGB32 here)
  • MF_MT_INTERLACE_MODE: Interlace mode (Progressive for us)
  • MF_MT_FRAME_SIZE: The size of the frame (width and height)
  • MF_MT_FRAME_RATE: Frame per second
  • MF_MT_PIXEL_ASPECT_RATIO: Aspect ratio

Adding a new frame

Once we have a sink writer, you just have to append new frame by sending a byte array to the component:

void VideoGenerator::AppendNewFrame(const Platform::Array<byte> ^videoFrameBuffer)
{
    auto length = videoFrameBuffer->Length / sizeof(DWORD);
    DWORD *buffer = (DWORD *)(videoFrameBuffer->Data);
    std::unique_ptr<DWORD[]> target(new DWORD[length]);

    for (UINT32 index = 0; index < length; index++)
    {
        DWORD color = buffer[index];
        BYTE b = (BYTE)((color & 0x00FF0000) >> 16);
        BYTE g = (BYTE)((color & 0x0000FF00) >> 8);
        BYTE r = (BYTE)((color & 0x000000FF));

#if ARM
        auto row = index / videoWidth;
        auto targetRow = videoHeight - row - 1;
        auto column = index - (row * videoWidth);
        target[(targetRow * videoWidth) + column] = (r << 16) + (g << 8) + b;
#else
        target[index] = (r << 16) + (g << 8) + b;
#endif
    }

    // Send frame to the sink writer.
    HRESULT hr = WriteFrame(target.get(), rtStart, rtDuration);
    if (FAILED(hr))
    {
        throw Platform::Exception::CreateException(hr);
    }
    rtStart += rtDuration;
}

Before writing data, you have to prepare it because:

  • Data must be provided using RGB format and in my case (HTML5 canvas), data is stored using BGR format
  • On ARM devices (detected thanks to the define #ARM), data is stored starting from the last line (and not the first line) so you have to revert them on the Y axis

Then, when your data is ready, just call the following code to append it to the output file:

HRESULT VideoGenerator::WriteFrame(
    DWORD *videoFrameBuffer,
    const LONGLONG& rtStart,        // Time stamp.
    const LONGLONG& rtDuration      // Frame duration.
    )
{
    ComPtr<IMFSample> sample;
    ComPtr<IMFMediaBuffer> buffer;

    const LONG cbWidth = 4 * videoWidth;
    const DWORD cbBuffer = cbWidth * videoHeight;

    BYTE *pData = NULL;

    // Create a new memory buffer.
    HRESULT hr = MFCreateMemoryBuffer(cbBuffer, &buffer);

    // Lock the buffer and copy the video frame to the buffer.
    if (SUCCEEDED(hr))
    {
        hr = buffer->Lock(&pData, NULL, NULL);
    }
    if (SUCCEEDED(hr))
    {
        hr = MFCopyImage(
            pData,                      // Destination buffer.
            cbWidth,                    // Destination stride.
            (BYTE*)videoFrameBuffer,    // First row in source image.
            cbWidth,                    // Source stride.
            cbWidth,                    // Image width in bytes.
            videoHeight                // Image height in pixels.
            );
    }
    if (buffer.Get())
    {
        buffer->Unlock();
    }

    // Set the data length of the buffer.
    if (SUCCEEDED(hr))
    {
        hr = buffer->SetCurrentLength(cbBuffer);
    }

    // Create a media sample and add the buffer to the sample.
    if (SUCCEEDED(hr))
    {
        hr = MFCreateSample(&sample);
    }
    if (SUCCEEDED(hr))
    {
        hr = sample->AddBuffer(buffer.Get());
    }

    // Set the time stamp and the duration.
    if (SUCCEEDED(hr))
    {
        hr = sample->SetSampleTime(rtStart);
    }
    if (SUCCEEDED(hr))
    {
        hr = sample->SetSampleDuration(rtDuration);
    }

    // Send the sample to the Sink Writer.
    if (SUCCEEDED(hr))
    {
        hr = sinkWriter->WriteSample(streamIndex, sample.Get());
    }

    return hr;
}

The code creates a sample, fill it with your data and set time and duration before writing the sample to the sink writer.

Closing and cleaning

When you are done, you just have to call the Finalize method (and for the sake of completeness, I also add the destructor here):

VideoGenerator::~VideoGenerator()
{
    Finalize();
}

void VideoGenerator::Finalize()
{
    if (!initiated)
        return;

    initiated = false;
    sinkWriter->Finalize();
    MFShutdown();
}

Finalization is just about closing the sink writer and release Media Foundation API (MFShutdown).

Using VideoGenerator

VideoGenerator is a WinRT component so you can easily use it from .NET or Javascript. For instance, using Javascript, the client code can be something like that:

var picker = new Windows.Storage.Pickers.FileSavePicker();
picker.fileTypeChoices.insert(Flipflop.Tools.GetString("VideoFiles"), [".wmv"]);

picker.pickSaveFileAsync().then(function (file) {
    if (!file) {
        return;
    }
    file.openAsync(Windows.Storage.FileAccessMode.readWrite).then(function (stream) {
        var videoGenerator = new VideoTools.VideoGenerator(800, 600, stream, 16);

        for (var commandsIndex = 0; commandsIndex < 50; commandsIndex++) {
            var canvas = document.getElementById("canvas" + commandsIndex);
            var context = canvas.getContext("2d");

            var data = context.getImageData(0, 0, width, height);

            var bytes = data.data;
            videoGenerator.appendNewFrame(bytes);
        }

        videoGenerator.finalize();
        stream.close();
    });
});

Complete code

The complete VideoGenerator code can be found here.

Or copy/paste right here.

VideoGenerator.h

#pragma once
#include <Windows.h>
#include <mfapi.h>
#include <mfidl.h>
#include <Mfreadwrite.h>
#include <mferror.h>
#include <wrlclient.h>
#include <memory>

using namespace Platform;
using namespace Microsoft::WRL;

namespace VideoTools
{
    public ref class VideoGenerator sealed
    {
        UINT32 videoWidth;
        UINT32 videoHeight;
        UINT32 fps;
        UINT32 bitRate;
        UINT32 frameSize;
        GUID   encodingFormat;
        GUID   inputFormat;

        DWORD  streamIndex;
        ComPtr<IMFSinkWriter> sinkWriter;

        bool   initiated;

        LONGLONG rtStart;
        UINT64 rtDuration;

    private:
        HRESULT InitializeSinkWriter(Windows::Storage::Streams::IRandomAccessStream^ stream);
        HRESULT WriteFrame(DWORD *videoFrameBuffer, const LONGLONG& rtStart, 
const LONGLONG& rtDuration); public: VideoGenerator(UINT32 width, UINT32 height,
Windows::Storage::Streams::
IRandomAccessStream^ stream, UINT32 delay); virtual ~VideoGenerator(); void AppendNewFrame(const Array<byte> ^videoFrameBuffer); void Finalize(); }; }

VideoGenerator.cpp

#include "pch.h"
#include "VideoGenerator.h"

#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mfuuid")

using namespace VideoTools;

VideoGenerator::VideoGenerator(UINT32 width, UINT32 height, 
Windows::Storage::Streams::
IRandomAccessStream^ stream, UINT32 delay) { videoWidth = width; videoHeight = height; fps = 25; bitRate = 400000; frameSize = videoWidth * videoHeight; encodingFormat = MFVideoFormat_WMV3; inputFormat = MFVideoFormat_RGB32; HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); if (SUCCEEDED(hr)) { hr = MFStartup(MF_VERSION); if (SUCCEEDED(hr)) { hr = InitializeSinkWriter(stream); if (SUCCEEDED(hr)) { initiated = true; rtStart = 0; rtDuration = (10000000 * delay) / 1000; } } } } VideoGenerator::~VideoGenerator() { Finalize(); } void VideoGenerator::Finalize() { if (!initiated) return; initiated = false; sinkWriter->Finalize(); MFShutdown(); } void VideoGenerator::AppendNewFrame(const Platform::Array<byte> ^videoFrameBuffer) { auto length = videoFrameBuffer->Length / sizeof(DWORD); DWORD *buffer = (DWORD *)(videoFrameBuffer->Data); std::unique_ptr<DWORD[]> target(new DWORD[length]); for (UINT32 index = 0; index < length; index++) { DWORD color = buffer[index]; BYTE b = (BYTE)((color & 0x00FF0000) >> 16); BYTE g = (BYTE)((color & 0x0000FF00) >> 8); BYTE r = (BYTE)((color & 0x000000FF)); #if ARM auto row = index / videoWidth; auto targetRow = videoHeight - row - 1; auto column = index - (row * videoWidth); target[(targetRow * videoWidth) + column] = (r << 16) + (g << 8) + b; #else target[index] = (r << 16) + (g << 8) + b; #endif } // Send frame to the sink writer. HRESULT hr = WriteFrame(target.get(), rtStart, rtDuration); if (FAILED(hr)) { throw Platform::Exception::CreateException(hr); } rtStart += rtDuration; } HRESULT VideoGenerator::InitializeSinkWriter(Windows::Storage::Streams::IRandomAccessStream^ stream) { ComPtr<IMFAttributes> spAttr; ComPtr<IMFMediaType> mediaTypeOut; ComPtr<IMFMediaType> mediaTypeIn; ComPtr<IMFByteStream> spByteStream; HRESULT hr = MFCreateMFByteStreamOnStreamEx((IUnknown*)stream, &spByteStream); if (SUCCEEDED(hr)) { MFCreateAttributes(&spAttr, 10); spAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true); hr = MFCreateSinkWriterFromURL(L".wmv", spByteStream.Get(), spAttr.Get(), &sinkWriter); } // Set the output media type. if (SUCCEEDED(hr)) { hr = MFCreateMediaType(&mediaTypeOut); } if (SUCCEEDED(hr)) { hr = mediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); } if (SUCCEEDED(hr)) { hr = mediaTypeOut->SetGUID(MF_MT_SUBTYPE, encodingFormat); } if (SUCCEEDED(hr)) { hr = mediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, bitRate); } if (SUCCEEDED(hr)) { hr = mediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); } if (SUCCEEDED(hr)) { hr = MFSetAttributeSize(mediaTypeOut.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight); } if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_FRAME_RATE, fps, 1); } if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1); } if (SUCCEEDED(hr)) { hr = sinkWriter->AddStream(mediaTypeOut.Get(), &streamIndex); } // Set the input media type. if (SUCCEEDED(hr)) { hr = MFCreateMediaType(&mediaTypeIn); } if (SUCCEEDED(hr)) { hr = mediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); } if (SUCCEEDED(hr)) { hr = mediaTypeIn->SetGUID(MF_MT_SUBTYPE, inputFormat); } if (SUCCEEDED(hr)) { hr = mediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); } if (SUCCEEDED(hr)) { hr = MFSetAttributeSize(mediaTypeIn.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight); } if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_FRAME_RATE, fps, 1); } if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1); } if (SUCCEEDED(hr)) { hr = sinkWriter->SetInputMediaType(streamIndex, mediaTypeIn.Get(), NULL); } // Tell the sink writer to start accepting data. if (SUCCEEDED(hr)) { hr = sinkWriter->BeginWriting(); } return hr; } HRESULT VideoGenerator::WriteFrame( DWORD *videoFrameBuffer, const LONGLONG& rtStart, // Time stamp. const LONGLONG& rtDuration // Frame duration. ) { ComPtr<IMFSample> sample; ComPtr<IMFMediaBuffer> buffer; const LONG cbWidth = 4 * videoWidth; const DWORD cbBuffer = cbWidth * videoHeight; BYTE *pData = NULL; // Create a new memory buffer. HRESULT hr = MFCreateMemoryBuffer(cbBuffer, &buffer); // Lock the buffer and copy the video frame to the buffer. if (SUCCEEDED(hr)) { hr = buffer->Lock(&pData, NULL, NULL); } if (SUCCEEDED(hr)) { hr = MFCopyImage( pData, // Destination buffer. cbWidth, // Destination stride. (BYTE*)videoFrameBuffer, // First row in source image. cbWidth, // Source stride. cbWidth, // Image width in bytes. videoHeight // Image height in pixels. ); } if (buffer.Get()) { buffer->Unlock(); } // Set the data length of the buffer. if (SUCCEEDED(hr)) { hr = buffer->SetCurrentLength(cbBuffer); } // Create a media sample and add the buffer to the sample. if (SUCCEEDED(hr)) { hr = MFCreateSample(&sample); } if (SUCCEEDED(hr)) { hr = sample->AddBuffer(buffer.Get()); } // Set the time stamp and the duration. if (SUCCEEDED(hr)) { hr = sample->SetSampleTime(rtStart); } if (SUCCEEDED(hr)) { hr = sample->SetSampleDuration(rtDuration); } // Send the sample to the Sink Writer. if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(streamIndex, sample.Get()); } return hr; }

Going further

Some useful links to go further: