2
« on: July 03, 2011, 10:45:34 am »
Hi,
I've been interested in making little 2D games for several years now, but never finished anything too elaborate. I was pretty exciting about SFML when I heard of it, because it's an easy to use, cross platform, hardware accelerated 2D graphics library - just what I need to get on with making games, not fiddling around with perfecting engine code. Using it a bit though, I was disappointed with the performance. I am coming from a background in DirectX9 where you have access to the Direct3DX9Sprite interface, so I wrote two little toy programs to benchmark SFML against D3DXSprite.
The task was the build a program that opened a 1024x768 windowed application that would fill the screen with a 32x32 tiled image. It should have to make 768 Draw calls per frame.
D3DXSprite version:
#include <string>
#include <boost/lexical_cast.hpp>
#include <windows.h>
#include <d3d9.h>
#include <d3dx9.h>
#pragma comment (lib,"d3d9.lib")
#ifdef _DEBUG
#pragma comment (lib,"d3dx9d.lib")
#else
#pragma comment (lib,"d3dx9.lib")
#endif
bool registerClass();
bool createWindow();
bool dxSetup();
void dxCleanup ();
void msgLoop();
void drawFrame();
LRESULT CALLBACK WindowProcedure (HWND,UINT,WPARAM,LPARAM);
//globals
const std::string winClassname = "WinClass";
unsigned int width = 1024;
unsigned int height = 768;
unsigned int texture_size = 32;
HWND hwnd = NULL;
IDirect3D9* d3d = NULL;
IDirect3DDevice9* device = NULL;
ID3DXSprite* d3d_sprite = NULL;
D3DPRESENT_PARAMETERS d3dPresentParams;
D3DXMATRIX normal;
IDirect3DTexture9* texture = NULL;
unsigned int frames = 0;
std::string FPS = "not calculated";
int WINAPI WinMain (HINSTANCE hThisInst,HINSTANCE hPrevInst,LPSTR lpszArgs,int nWinMode)
{
try
{
// Register the Window Class
if (!registerClass())
{
// Register class failed
return 1;
}
if (createWindow())
{
// Successfully created a window, start the game engine
//engine = new kb::KingdomBattle(hwnd);
ShowWindow(hwnd,SW_SHOW);
}
else
{
// Create Window failed
return 1;
}
if (!dxSetup())
{
return 1;
}
// Enter the message loop
msgLoop();
}
catch(std::exception& e)
{
MessageBox(hwnd,e.what(),"exception",0);
}
// Message loop terminated, cleanup
dxCleanup();
ShowWindow(hwnd,SW_HIDE);
MessageBox(NULL,FPS.c_str(),"Benchmark Results",0);
return 0;
}
LRESULT CALLBACK WindowProcedure (HWND hwnd,UINT message,WPARAM wParam,LPARAM lParam)
{
if (wParam == VK_ESCAPE && message == WM_KEYDOWN)
{
PostQuitMessage(0);
return 0;
}
switch(message)
{
case WM_KEYDOWN:
break;
case WM_KEYUP:
break;
case WM_MOUSEMOVE:
break;
case WM_LBUTTONDOWN:
break;
case WM_RBUTTONDOWN:
break;
case WM_LBUTTONUP:
break;
case WM_RBUTTONUP:
break;
case WM_CLOSE:
DestroyWindow(hwnd);
break;
case WM_DESTROY:
PostQuitMessage(0);
break;
default:
return DefWindowProc(hwnd, message, wParam, lParam);
}
return 0;
}
bool registerClass()
{
WNDCLASSEX wcl;
//define a window class
wcl.cbSize = sizeof(WNDCLASSEX);
wcl.hInstance = GetModuleHandle(NULL);
wcl.lpszClassName = winClassname.c_str();
wcl.lpfnWndProc = WindowProcedure;
wcl.style = 0;
wcl.hIcon = NULL;
wcl.hIconSm = NULL;
wcl.hCursor = LoadCursor(NULL, IDC_ARROW);
wcl.lpszMenuName = NULL;
wcl.cbClsExtra = 0;
wcl.cbWndExtra = 0;
wcl.hbrBackground = (HBRUSH) GetStockObject(BLACK_BRUSH);
//register a window
if (!RegisterClassEx(&wcl)) return false;
else return true;
}
bool createWindow()
{
hwnd = CreateWindow(winClassname.c_str(),
"DirectX Benchmark",
WS_OVERLAPPEDWINDOW,
CW_USEDEFAULT,
CW_USEDEFAULT,
width,
height,
NULL,
NULL,
GetModuleHandle(NULL),
NULL);
if (!hwnd) return false;
else
{
return true;
}
}
bool dxSetup()
{
HRESULT hr = D3D_OK;
d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DXMatrixScaling(&normal,1.0f,1.0f,1.0f);
ZeroMemory(&d3dPresentParams, sizeof(d3dPresentParams));
d3dPresentParams.Windowed = TRUE;
d3dPresentParams.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dPresentParams.hDeviceWindow = hwnd;
d3dPresentParams.BackBufferFormat = D3DFMT_A8R8G8B8;
d3dPresentParams.BackBufferWidth = width;
d3dPresentParams.BackBufferHeight = height;
d3dPresentParams.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
hr = d3d->CreateDevice(D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
hwnd,
D3DCREATE_HARDWARE_VERTEXPROCESSING,
&d3dPresentParams,
&device);
if (FAILED(hr))
{
return false;
}
device->SetFVF(D3DFVF_XYZRHW | D3DFVF_DIFFUSE | D3DFVF_TEX1);
device->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE); // turn on the color blending
device->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_SRCALPHA); // set source factor
device->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA); // set dest factor
device->SetRenderState(D3DRS_BLENDOP, D3DBLENDOP_ADD); // set the operation
// create the sprite object
hr = D3DXCreateSprite(device,&d3d_sprite);
if (FAILED(hr))
{
return false;
}
hr = D3DXCreateTextureFromFileEx(device,
"img.png", // filename
D3DX_DEFAULT, // width
D3DX_DEFAULT, // height
D3DX_DEFAULT, // mip mapping
NULL, // usage
D3DFMT_A8R8G8B8,
D3DPOOL_MANAGED,
D3DX_DEFAULT, // filtering
D3DX_DEFAULT, // mip filtering
NULL, // colorkey
NULL, // image info struct
NULL, // palette
&texture);
if (FAILED(hr) || texture == NULL)
{
throw std::exception("createtexture failed");
}
return true;
}
void dxCleanup ()
{
if (d3d_sprite != NULL) { d3d_sprite->Release(); }
if (texture != NULL) { texture->Release(); }
if (device != NULL) { device->Release(); }
if (d3d != NULL) { d3d->Release(); }
}
void msgLoop()
{
__int64 frequency = 0;
__int64 start_time = 0;
__int64 end_time = 0;
QueryPerformanceFrequency((LARGE_INTEGER*)&frequency);
QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
MSG msg;
while(true)
{
if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
{
if (msg.message == WM_QUIT)
{
break; // exit the message loop
}
// else ...
TranslateMessage(&msg);
DispatchMessage(&msg);
}
drawFrame();
}
QueryPerformanceCounter((LARGE_INTEGER*)&end_time);
double seconds = static_cast<double>(end_time - start_time) / frequency;
FPS = "Runtime: "+boost::lexical_cast<std::string>(seconds)+" sec\nFPS: "+boost::lexical_cast<std::string>(frames / seconds)+" frames/sec";
}
void drawFrame()
{
device->Clear(0,NULL,D3DCLEAR_TARGET,D3DCOLOR_XRGB(255,255,255),1.0f,0);
device->BeginScene();
d3d_sprite->Begin(D3DXSPRITE_ALPHABLEND);
D3DXVECTOR3 pos;
pos.z = 0.f;
for (unsigned int y=0; y<height/texture_size; ++y)
{
for (unsigned int x=0; x<width/texture_size; ++x)
{
pos.x = static_cast<float>(x*texture_size);
pos.y = static_cast<float>(y*texture_size);
HRESULT hr = d3d_sprite->Draw(texture,NULL,NULL,&pos,D3DCOLOR_ARGB(255,255,255,255));
if (FAILED(hr))
{
throw std::exception("d3d9 sprite object draw call failed");
}
}
}
d3d_sprite->End();
device->EndScene();
device->Present(NULL,NULL,NULL,NULL);
++frames;
}
SFML2 version
#include <string>
#include <boost/lexical_cast.hpp>
unsigned int width = 1024;
unsigned int height = 768;
unsigned int texture_size = 32;
std::string FPS = "not calculated";
unsigned int frames = 0;
#include <SFML/Graphics.hpp>
int WINAPI WinMain (HINSTANCE hThisInst,HINSTANCE hPrevInst,LPSTR lpszArgs,int nWinMode)
{
sf::RenderWindow window(sf::VideoMode(width, height), "SFML window");
sf::Image image;
if (!image.LoadFromFile("img.png"))
{
return 1;
}
sf::Sprite sprite(image);
__int64 frequency = 0;
__int64 start_time = 0;
__int64 end_time = 0;
QueryPerformanceFrequency((LARGE_INTEGER*)&frequency);
QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
while (window.IsOpened())
{
sf::Event e;
while (window.GetEvent(e))
{
if (e.Type == sf::Event::Closed)
{
window.Close();
}
if ((e.Type == sf::Event::KeyPressed) && (e.Key.Code == sf::Key::Escape))
{
window.Close();
}
}
window.Clear();
for (unsigned int y=0; y<height/texture_size; ++y)
{
for (unsigned int x=0; x<width/texture_size; ++x)
{
sprite.SetPosition(static_cast<float>(x*texture_size),static_cast<float>(y*texture_size));
window.Draw(sprite);
}
}
window.Display();
++frames;
}
QueryPerformanceCounter((LARGE_INTEGER*)&end_time);
double seconds = static_cast<double>(end_time - start_time) / frequency;
FPS = "Runtime: "+boost::lexical_cast<std::string>(seconds)+" sec\nFPS: "+boost::lexical_cast<std::string>(frames / seconds)+" frames/sec";
MessageBox(NULL,FPS.c_str(),"Benchmark Results",0);
return 0;
}
What I found with these two benchmarks running on my video card (NVidia 9800GTX) was that the Direct3DXSprite version pushes about 3500 frames per second, while the SFML2 version can only do around 500. Even more worrying, while compiled in Visual Studio 2010's Debug mode, the SFML2 version can only manage about 25 frames per second, while a debug build has no appreciable affect on the Direct3DXSprite version, and it still manages around 3500.
I'm looking for an explanation of why Direct3DXSprite is so much faster in this case. Both of these programs are calling down to the same hardware, and it's not like there are speed differences between OpenGL and DirectX to pin this on. I am assuming that the sprite batching in SFML in really naive, and it's actually making several API draw calls, where the D3DXSprite object is smart enough to batch these down to 1 call.
I know it's a bit of a jerk move to register on your forum just to have a go at your hard work. I think SFML is really good. The Direct3DXSprite version above is 300 lines of code compared to 60 for the SFML version. Look at how much work SFML saved me! And the SFML version is fundamentally portable, while the D3DXSprite version is not. I want to use SFML, but I just can't justify the performance hit.
Any comments on this issue?