DirectCompute is Microsoft's version of OpenCL (Open Computing Language). This allows generic processing to be done on the GPU (Graphics Processing Unit) which is typically
highly parallel, vector math.
The code and explanations provided are from my own personal experience with DirectCompute 5.0. I am not a Microsoft representative nor do I work for Microsoft
To set up a DirectCompute context, first the GPU must be enumerated. This is done very much in the same way that DirectX 11.1 (it shares the same API) sets up a GPU.
The Microsoft documentation is at:
http://msdn.microsoft.com/en-us/library/windows/desktop/hh309466(v=vs.85).aspx
The data types using the DirectX 11.1 API are:
- ID3D11Device1 - A class that defines the actual GPU device (the 1 means DirectX 11.1)
- ID3D11DeviceContext1 - A class that connects CPU-side programs with the GPU-side programs
- ID3D11ComputeShader - A class that stores the compute shader (program)
- ID3D11Buffer - A class that stores a block of memory on the GPU
- ID3D11UnorderedAccessView - A class that allows CPU<->GPU memory transfers
A note about the Component Object Model (COM) which all of the above data types follow (thus the I for "interface" prefixing all data types):
The component object model is a create and destroy type of model. All of these data types are pointers to a created object in memory. To free the resources on both the CPU
and the GPU side require a call to:
object->Release()
The libraries you will need to link in are:
- dxgi.lib
- d3dcompiler.lib
- d3d11.lib NOTE: d3d11.lib is for both DirectX 11.1 and DirectX 11
The header files you will need are:
- d3d11_1.h NOTE:This is for DirectX 11.1, DirectX 11 is d3d11.h
- d3dcompiler.h
To initialize a DirectCompute shader, you basically have to find the device you want to run it on, compile the shader, allocate the memory, and then dispatch the program.
Enumerating Adapters
IDXGIFactory2 *dxgiFactory;
IDXGIAdapter2 *dxgiAdapter;
HRESULT hresult;
int i = 0;
std::
vector<IDXGIAdapter2*> adapterList;
hresult =
CreateDXGIFactory1(
__uuidof(IDXGIFactory2), (
void**)&dxgiFactory);
if (
FAILED(hresult)) {
return false;
}
while (dxgiFactory->
EnumAdapters1(i++, (
IDXGIAdapter1**)&dxgiAdapter) !=
DXGI_ERROR_NOT_FOUND) {
adapterList.
push_back(dxgiAdapter);
}
for (i = 0;i < adapterList.
size();i++) {
adapterList[i]->
Release();
}
dxgiFactory->
Release();
Creating the Device Object
ID3D11Device *oldStyleDevice;
ID3D11DeviceContext *oldStyleContext;
D3D_FEATURE_LEVEL afl, fl[] = {
D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1 };
hresult =
D3D11CreateDevice(
adapterList[0],
D3D_DRIVER_TYPE_UNKNOWN,
NULL,
0,
fl,
ARRAYSIZE(fl),
&oldStyleDevice,
&afl,
&oldStyleContext
);
if (
FAILED(hresult)) {
return false;
}
ID3D11Device1 *newStyleDevice;
ID3D11DeviceContext1 *newStyleContext;
oldStyleDevice->
QueryInterface(
__uuidof(
ID3D11Device1), (
void**)&newStyleDevice);
oldStyleContext->
QueryInterface(
__uuidof(
ID3D11DeviceContext1), (
void**)&newStyleContext);
oldStyleDevice->
Release();
oldStyleContext->
Release();
Compiling the Shader
ID3DBlob *CSBlob, *ErrBlob;
hresult =
D3DCompileFromFile(
L
"myshader.cs",
NULL,
NULL,
"main",
"cs_5_0",
0,
0,
&CSBlob,
&ErrBlob
);
if (
FAILED(hresult)) {
OutputDebugStringA((
LPCSTR)ErrBlob->
GetBufferPointer());
ErrBlob->
Release();
return false;
}
ID3D11ComputeShader *computeShader;
hresult = newStyleDevice->
CreateComputeShader(CSBlob->
GetBufferPointer(), CSBlob->
GetBufferSize(), 0, &computeShader);
if (
FAILED(hresult)) {
return false;
}
CSBlob->
Release();
newStyleContext->
CSSetShader(computeShader, 0, 0);
computeShader->
Release();
Setting up the Buffers
D3D11_BUFFER_DESC shaderDataBufferDesc;
D3D11_BUFFER_DESC shaderCopyBufferDesc;
D3D11_UNORDERED_ACCESS_VIEW_DESC shaderAccessViewDesc;
ID3D11Buffer *shaderDataBuffer;
ID3D11Buffer *shaderCopyBuffer;
ID3D11UnorderedAccessView *shaderAccessView;
ZeroMemory(&shaderDataBufferDesc,
sizeof(
D3D11_BUFFER_DESC));
ZeroMemory(&shaderCopyBufferDesc,
sizeof(
D3D11_BUFFER_DESC));
ZeroMemory(&shaderAccessViewDesc,
sizeof(
D3D11_BUFFER_DESC));
shaderDataBufferDesc.Usage =
D3D11_USAGE_DEFAULT;
shaderDataBufferDesc.ByteWidth =
sizeof(
float)*4;
shaderDataBufferDesc.MiscFlags =
D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
shaderDataBufferDesc.BindFlags =
D3D11_BIND_UNORDERED_ACCESS |
D3D11_BIND_SHADER_RESOURCE;
shaderDataBufferDesc.StructureByteStride =
sizeof(
float);
hresult = newStyleDevice->
CreateBuffer(&shaderDataBufferDesc, 0, &shaderDataBuffer);
if (
FAILED(hresult)) {
return false;
}
shaderCopyBufferDesc.Usage =
D3D11_USAGE_STAGING;
shaderCopyBufferDesc.ByteWidth = shaderDataBufferDesc.ByteWidth;
shaderCopyBufferDesc.CPUAccessFlags =
D3D11_CPU_ACCESS_READ;
shaderCopyBufferDesc.StructureByteStride = shaderDataBufferDesc.StructureByteStride;
hresult = newStyleDevice->
CreateBuffer(&shaderCopyBufferDesc, 0, &shaderCopyBuffer);
if (
FAILED(hresult)) {
return false;
}
shaderAccessViewDesc.Format =
DXGI_FORMAT_UNKNOWN;
shaderAccessViewDesc.Buffer.NumElements = 4;
shaderAccessViewDesc.ViewDimension =
D3D11_UAV_DIMENSION_BUFFER;
hresult = newStyleDevice->
CreateUnorderedAccessView(shaderDataBuffer, &shaderAccessViewDesc, &shaderAccessView);
if (
FAILED(hresult)) {
return false;
}
newStyleContext->
CSSetUnorderedAccessViews(0, 1, &shaderAccessView, 0);
shaderAccessView->
Release();
Writing/Updating the Shader's Input Buffer
float data[] = { 1.0f, 2.0f, 3.0f, 4.0f };
newStyleContext->
UpdateSubresource(shaderDataBuffer, 0, 0, data, sizeof(float)*4, 0);
Making the Shader Do Work
Copy the Shader's Output
D3D11_MAPPED_SUBRESOURCE msr;
float dataFromShader[4];
newStyleContext->
CopyResource(shaderCopyBuffer, shaderDataBuffer);
hresult = newStyleContext->
Map(shaderCopyBuffer, 0,
D3D11_MAP_READ, 0, &msr);
if (
FAILED(hresult)) {
return false;
}
memcpy(dataFromShader, msr.pData,
sizeof(
float)*4);
newStyleContext->
Unmap(shaderCopyBuffer, 0);
The Actual Compute Shader (in HLSL)
RWStructuredBuffer<float> Result : register (u0);
struct CSInput {
uint3 Gid : SV_GroupID;
uint3 DTid : SV_DispatchThreadID;
uint3 GTid : SV_GroupThreadID;
uint GI : SV_GroupIndex;
};
[numthreads(2,2,1)]
void main(in CSInput csvalues) {
float modValue = Result[csvalues.GI];
Result[csvalues.GI] = modValue*modValue;
}
Cleaning Up and Exiting
shaderDataBuffer->Release();
shaderCopyBuffer->Release();
newStyleContext->Release();
newStyleDevice->Release();