Subversion Repository Public Repository

Divide-Framework

This repository has no backups
This repository's network speed is throttled to 100KB/sec

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#include "Headers/GFXDevice.h"

#include "Core/Headers/Kernel.h"
#include "Core/Headers/Application.h"
#include "Core/Headers/ParamHandler.h"
#include "Core/Resources/Headers/ResourceCache.h"
#include "Hardware/Video/Shaders/Headers/ShaderManager.h"
#include "Rendering/PostFX/Headers/PostFX.h"
#include "Rendering/Headers/ForwardPlusRenderer.h"
#include "Rendering/Headers/DeferredShadingRenderer.h"

namespace Divide {

/// Create a display context using the selected API and create all of the needed primitives needed for frame rendering
ErrorCode GFXDevice::initRenderingApi(const vec2<U16>& resolution, I32 argc, char **argv) {
	ErrorCode hardwareState = createAPIInstance();
	if (hardwareState == NO_ERR) {
		// Initialize the rendering API
		_api->initRenderingApi(resolution, argc, argv);
	} else {
		// Validate initialization
        return hardwareState;
    }
    // Initialize the shader manager
    ShaderManager::getOrCreateInstance().init();
    // Create an immediate mode shader used for general purpose rendering (e.g. to mimic the fixed function pipeline)
    _imShader = ShaderManager::getInstance().getDefaultShader();
    DIVIDE_ASSERT(_imShader != nullptr, "GFXDevice error: No immediate mode emulation shader available!");
	PostFX::createInstance();
    // Create a shader buffer to store the following info: 
    // ViewMatrix, ProjectionMatrix, ViewProjectionMatrix, CameraPositionVec, 
    // ViewportRec, zPlanesVec4 and ClipPlanes[MAX_CLIP_PLANES]
    // It should translate to (as seen by OpenGL) a uniform buffer without persistent mapping. 
    // (Many small updates with BufferSubData are recommended with the target usage of the buffer)
    _gfxDataBuffer = newSB(false, false);
    _gfxDataBuffer->Create(1, sizeof(GPUBlock)); 
    _gfxDataBuffer->Bind(SHADER_BUFFER_GPU_BLOCK);
    // Every visible node will first update this buffer with required data
    // (WorldMatrix, NormalMatrix, Material properties, Bone count, etc)
    // Due to it's potentially huge size, it translates to (as seen by OpenGL) a Shader Storage Buffer that's persistently 
    // and coherently mapped
    _nodeBuffer = newSB(true);
    _nodeBuffer->Create(Config::MAX_VISIBLE_NODES, sizeof(NodeData));
    _nodeBuffer->Bind(SHADER_BUFFER_NODE_INFO);
    // Resize our window to the target resolution (usually, the splash screen resolution)
    changeResolution(resolution.width, resolution.height);
    // Create general purpose render state blocks
    RenderStateBlockDescriptor defaultStateDescriptor;
    _defaultStateBlockHash = getOrCreateStateBlock(defaultStateDescriptor);
    RenderStateBlockDescriptor defaultStateDescriptorNoDepth;
    defaultStateDescriptorNoDepth.setZReadWrite(false, true);
    _defaultStateNoDepthHash = getOrCreateStateBlock(defaultStateDescriptorNoDepth);
    RenderStateBlockDescriptor state2DRenderingDesc;
    state2DRenderingDesc.setCullMode(CULL_MODE_NONE);
    state2DRenderingDesc.setZReadWrite(false, true);
    _state2DRenderingHash = getOrCreateStateBlock(state2DRenderingDesc);
    RenderStateBlockDescriptor stateDepthOnlyRendering;
    stateDepthOnlyRendering.setColorWrites(false, false, false, false);
    stateDepthOnlyRendering.setZFunc(CMP_FUNC_ALWAYS);
    _stateDepthOnlyRenderingHash = getOrCreateStateBlock(stateDepthOnlyRendering);
    // Block with hash 0 is null, and it's used to force a block update, bypassing state comparison with previous blocks
    _stateBlockMap[0] = nullptr;
    // The general purpose render state blocks are both mandatory and must differ from each other at a state hash level
    DIVIDE_ASSERT(_stateDepthOnlyRenderingHash != _state2DRenderingHash,    
                  "GFXDevice error: Invalid default state hash detected!");
    DIVIDE_ASSERT(_state2DRenderingHash        != _defaultStateNoDepthHash, 
                  "GFXDevice error: Invalid default state hash detected!");
    DIVIDE_ASSERT(_defaultStateNoDepthHash     != _defaultStateBlockHash, 
                  "GFXDevice error: Invalid default state hash detected!");
    // Activate the default render states
    setStateBlock(_defaultStateBlockHash);
    // Our default render targets hold the screen buffer, depth buffer, and a special, on demand, 
    // down-sampled version of the depth buffer
    // Screen FB should use MSAA if available
    _renderTarget[RENDER_TARGET_SCREEN]       = newFB(true);
    // The depth buffer should probably be merged into the screen buffer
    _renderTarget[RENDER_TARGET_DEPTH]        = newFB(false);
    // We need to create all of our attachments for the default render targets
    // Start with the screen render target: Try a half float, multisampled buffer (MSAA + HDR rendering if possible)
    TextureDescriptor screenDescriptor(TEXTURE_2D_MS, RGBA16F, FLOAT_16);
    SamplerDescriptor screenSampler;
    screenSampler.setFilters(TEXTURE_FILTER_NEAREST, TEXTURE_FILTER_NEAREST);
    screenSampler.setWrapMode(TEXTURE_CLAMP_TO_EDGE);
    screenSampler.toggleMipMaps(false);
    screenDescriptor.setSampler(screenSampler);
    // Next, create a depth attachment for the screen render target.
    // Must also multisampled. Use full float precision for long view distances
    SamplerDescriptor depthSampler;
    depthSampler.setFilters(TEXTURE_FILTER_NEAREST);
    depthSampler.setWrapMode(TEXTURE_CLAMP_TO_EDGE);
    depthSampler.toggleMipMaps(false);
    // Use greater or equal depth compare function, but depth comparison is disabled, anyway.
    depthSampler._cmpFunc = CMP_FUNC_GEQUAL; 
    TextureDescriptor depthDescriptor(TEXTURE_2D_MS, DEPTH_COMPONENT32F, FLOAT_32);
    depthDescriptor.setSampler(depthSampler);
    // The depth render target uses a HierarchicalZ buffer to help with occlusion culling
    // Must be as close as possible to the screen's depth buffer
    SamplerDescriptor depthSamplerHiZ;
    depthSamplerHiZ.setFilters(TEXTURE_FILTER_NEAREST_MIPMAP_NEAREST, TEXTURE_FILTER_NEAREST);
    depthSamplerHiZ.setWrapMode(TEXTURE_CLAMP_TO_EDGE);
    depthSamplerHiZ.toggleMipMaps(true);
    TextureDescriptor depthDescriptorHiZ(TEXTURE_2D_MS, DEPTH_COMPONENT32F, FLOAT_32);
    depthDescriptorHiZ.setSampler(depthSamplerHiZ);
    // Add the attachments to the render targets
    _renderTarget[RENDER_TARGET_SCREEN]->AddAttachment(screenDescriptor, TextureDescriptor::Color0);
    _renderTarget[RENDER_TARGET_SCREEN]->AddAttachment(depthDescriptor,  TextureDescriptor::Depth);
    _renderTarget[RENDER_TARGET_SCREEN]->Create(resolution.width, resolution.height);
    _renderTarget[RENDER_TARGET_DEPTH]->AddAttachment(depthDescriptorHiZ, TextureDescriptor::Depth);
    _renderTarget[RENDER_TARGET_DEPTH]->toggleColorWrites(false);
    _renderTarget[RENDER_TARGET_DEPTH]->Create(resolution.width, resolution.height);
    // If we enabled anaglyph rendering, we need a second target, identical to the screen target 
    // used to render the scene at an offset
    if(_enableAnaglyph){
        _renderTarget[RENDER_TARGET_ANAGLYPH] = newFB(true);
        _renderTarget[RENDER_TARGET_ANAGLYPH]->AddAttachment(screenDescriptor, TextureDescriptor::Color0);
        _renderTarget[RENDER_TARGET_ANAGLYPH]->AddAttachment(depthDescriptor,  TextureDescriptor::Depth);
        _renderTarget[RENDER_TARGET_ANAGLYPH]->Create(resolution.width, resolution.height);
    }
    // If render targets ready, we initialize our post processing system    
    PostFX::getInstance().init(resolution);
    // We also add a couple of useful cameras used by this class. One for rendering in 2D and one for generating cube maps
	
    Application::getInstance().getKernel()->getCameraMgr().addNewCamera("2DRenderCamera", _2DCamera);
	Application::getInstance().getKernel()->getCameraMgr().addNewCamera("_gfxCubeCamera", _cubeCamera);
    // Initialized our HierarchicalZ construction shader (takes a depth attachment and down-samples it for every mip level)
    _HIZConstructProgram = CreateResource<ShaderProgram>(ResourceDescriptor("HiZConstruct"));
    _HIZConstructProgram->UniformTexture("LastMip", 0);
    // Store our target z distances
    _gpuBlock._ZPlanesCombined.z = ParamHandler::getInstance().getParam<F32>("rendering.zNear");
    _gpuBlock._ZPlanesCombined.w = ParamHandler::getInstance().getParam<F32>("rendering.zFar");
    // Create a separate loading thread that shares resources with the main rendering context
    _loaderThread = MemoryManager_NEW std::thread(&GFXDevice::createLoaderThread, this);
    // Register a 2D function used for previewing the depth buffer.
#   ifdef _DEBUG
        add2DRenderFunction(DELEGATE_BIND(&GFXDevice::previewDepthBuffer, this), 0);
#   endif
    // We start of with a forward plus renderer
    setRenderer(MemoryManager_NEW ForwardPlusRenderer());
	ParamHandler::getInstance().setParam<bool>("rendering.previewDepthBuffer", false);
    // Everything is ready from the rendering point of view
    return NO_ERR;
}

/// Revert everything that was set up in initRenderingAPI()
void GFXDevice::closeRenderingApi() {
    // Delete the internal shader
    RemoveResource(_HIZConstructProgram);
	// Destroy our post processing system
	PRINT_FN( Locale::get( "STOP_POST_FX" ) );
	PostFX::destroyInstance();
    // Delete the renderer implementation
    PRINT_FN(Locale::get("CLOSING_RENDERER"));
    MemoryManager::DELETE( _renderer );
    // Delete our default render state blocks
    MemoryManager::DELETE_HASHMAP(_stateBlockMap);
    // Destroy all of the immediate mode emulation primitives created during runtime
    MemoryManager::DELETE_VECTOR(_imInterfaces);
    // Destroy all rendering passes and rendering bins
    RenderPassManager::destroyInstance();
    // Delete all of our rendering targets
    for ( Framebuffer*& renderTarget : _renderTarget ) {
        MemoryManager::DELETE( renderTarget );
    }
    // Delete our shader buffers
    MemoryManager::DELETE( _gfxDataBuffer );
    MemoryManager::DELETE( _nodeBuffer );
    // Close the shader manager
	ShaderManager::getInstance().destroy();
	// Close the rendering API
	_api->closeRenderingApi();
	// Wait for the loading thread to terminate
	_loaderThread->join();
	// And delete it
    MemoryManager::DELETE( _loaderThread );

	switch ( _apiId ) {
		case RenderAPI::OpenGL:
		case RenderAPI::OpenGLES: {
			GL_API::destroyInstance();
		} break;
		case RenderAPI::Direct3D: {
			DX_API::destroyInstance();
		} break;
		case RenderAPI::Mantle: {
		}break;
		case RenderAPI::None: {
		}break;
		default: {
		}break;
	};
}

/// After a swap buffer call, the CPU may be idle waiting for the GPU to draw to the screen, so we try to do some processing
void GFXDevice::idle() {
    // Update the zPlanes if needed
	_gpuBlock._ZPlanesCombined.z = ParamHandler::getInstance().getParam<F32>("rendering.zNear");
	_gpuBlock._ZPlanesCombined.w = ParamHandler::getInstance().getParam<F32>("rendering.zFar");
    // Pass the idle call to the post processing system
    PostFX::getInstance().idle();
    // And to the shader manager
	ShaderManager::getInstance().idle();
}

void GFXDevice::beginFrame() {
    _api->beginFrame();
    setStateBlock(_defaultStateBlockHash);
}

void GFXDevice::endFrame() { 
    // Max number of frames before an unused primitive is deleted (default: 180 - 3 seconds at 60 fps)
    static const I32 IM_MAX_FRAMES_ZOMBIE_COUNT = 180;

    if (Application::getInstance().mainLoopActive()) {
        // Render all 2D debug info and call API specific flush function
        toggle2D(true);
		for (std::pair<U32, DELEGATE_CBK<> >& callbackFunction : _2dRenderQueue) {
            callbackFunction.second();
        }
        toggle2D(false);

        //Remove dead primitives in 3 steps (or we could automate this with shared_ptr?):
        //1) Partition the vector in 2 parts: valid objects first, zombie objects second
        vectorImpl<IMPrimitive* >::iterator zombie = std::partition(_imInterfaces.begin(), _imInterfaces.end(),
                                                                    [](IMPrimitive* const priv){
                                                                        if(!priv->_canZombify) return true;
                                                                        return priv->zombieCounter() < IM_MAX_FRAMES_ZOMBIE_COUNT;
                                                                    });
        //2) For every zombie object, free the memory it's using
        for ( vectorImpl<IMPrimitive *>::iterator i = zombie ; i != _imInterfaces.end(); ++i ) {
            MemoryManager::DELETE( *i );
        }
        //3) Remove all the zombie objects once the memory is freed
        _imInterfaces.erase(zombie, _imInterfaces.end());
    
        FRAME_COUNT++;
        FRAME_DRAW_CALLS_PREV = FRAME_DRAW_CALLS;
        FRAME_DRAW_CALLS = 0;
    }

    _api->endFrame();  
}

Renderer* GFXDevice::getRenderer() const {
    DIVIDE_ASSERT(_renderer != nullptr, "GFXDevice error: Renderer requested but not created!"); 
    return _renderer;
}

void GFXDevice::setRenderer(Renderer* const renderer) {
    DIVIDE_ASSERT(renderer != nullptr, "GFXDevice error: Tried to create an invalid renderer!"); 
    MemoryManager::SAFE_UPDATE( _renderer, renderer );
}

ErrorCode GFXDevice::createAPIInstance() {
	DIVIDE_ASSERT(_api == nullptr, "GFXDevice error: initRenderingAPI called twice!");
	switch (_apiId) {
		case RenderAPI::OpenGL:
		case RenderAPI::OpenGLES: {
			_api = &GL_API::getOrCreateInstance();
		} break;
		case RenderAPI::Direct3D: {
			_api = &DX_API::getOrCreateInstance();
			ERROR_FN(Locale::get("ERROR_GFX_DEVICE_API"));
			return GFX_NOT_SUPPORTED;
		} break;
		case RenderAPI::Mantle: {
			ERROR_FN(Locale::get("ERROR_GFX_DEVICE_API"));
			return GFX_NOT_SUPPORTED;
		}break;
		case RenderAPI::None: {
			ERROR_FN(Locale::get("ERROR_GFX_DEVICE_API"));
			return GFX_NOT_SUPPORTED;
		}break;
		default: {
			ERROR_FN(Locale::get("ERROR_GFX_DEVICE_API"));
			return GFX_NON_SPECIFIED;
		}break;
	};

	return NO_ERR;
}

};

Commits for Divide-Framework/trunk/Source Code/Hardware/Video/GFXDeviceState.cpp

Diff revisions: vs.
Revision Author Commited Message
331 Diff Diff IonutCava picture IonutCava Sat 06 Dec, 2014 20:53:45 +0000

[Ionut]
- Limited line length to 132 characters to improve readability and diff-comparisons
- Refactored memory allocation/deallocation functions
- Fixed a few compatibility issues with HashMap.h
- Fixed a bug in GPU Skinning shaders (cast a float to int)

330 Diff Diff IonutCava picture IonutCava Fri 10 Oct, 2014 17:19:11 +0000

[Ionut]
- New rendering algorithm :
— Perform second pass for visible nodes gathering all render commands and uploading them to an indirect draw buffer
— Render geometry with indirect draw commands
— Removed per-node range binding of the node buffer. Instead, bind the entire buffer once and index data in shaders
— Use “baseInstance” parameter from IndirectDrawCommand as a node buffer index
— Prefer DSA extensions for updating buffer data where applicable
- Moved all rendering specific code from SceneGraphNode to a new RenderingComponent
- Optimized “toString” utility function

326 Diff Diff IonutCava picture IonutCava Tue 30 Sep, 2014 21:11:32 +0000

[Ionut]
- Fixed more memory leaks
- Simplified Task interface and timing System
- Improved compatibility between Boost, STL and EASTL with random combinations of vectors, strings and maps
- Simplified Light class by removing the “slot” member
- Random optimizations

325 Diff Diff IonutCava picture IonutCava Wed 17 Sep, 2014 20:06:13 +0000

[Ionut]
- Reworked transform system:
— Parent transform chain system moved from Transform class to the PhysicsComponent
— PhysicsComponent now returns all transform values needed both global and local (still needs heavy optimization and caching)

- Reworked SceneGraph node management:
— Proper ref counting of SceneNodes and proper removing of resource (ResourceCache is now empty on destruction, as it should be)
— Removed parent transform tracking as that’s the PhysicsComponent’s responsibility
— Only nodes loaded via the ResourceCache are allowed to be added to the SceneGraph (added proper loaders for Impostors, Sky, etc)

- Optimized some of the math classes (mat4, mat3)

324 Diff Diff IonutCava picture IonutCava Sun 14 Sep, 2014 14:01:39 +0000

[Ionut]
- New memory management system (no custom allocators in this commit)
— Memory tracer to detect memory leaks on shutdown (from: https://www.relisoft.com/book/tech/9new.html)
— SAFE_DELETE_XYZ are now inline template functions not macros
— ASSERTION system now supports logging and message boxes and continue on error configurable for each build type
— Fixed a lot of memory leaks detected by the new system (some still remain)

- Added SRGB support (part 1)
— Albedo textures are loaded as SRGB (make sure resources are SRGB)
— Shaders output gamma corrected colors
— Moved terrain textures back to 2 array system (albedo and normal maps, albedo maps using a SRGB sampler)

- Sky is now loaded via the ResourceCache system
- Added include safeguards in shader atoms

323 Diff Diff IonutCava picture IonutCava Thu 11 Sep, 2014 20:58:50 +0000

[Ionut]
- Reworked GFXDevice API initialization system
- Added a separate map for boolean values in ParamHandler
- More cleanups in GFXDevice class
- Fixed an infinite loop issue in Task class

322 Diff Diff IonutCava picture IonutCava Sat 06 Sep, 2014 20:33:47 +0000

[Ionut]
- Refactored most of Boost related code to be C++11 based
— Boost is only used for ThreadPool, Mutex handling, XML parsing and networking (ASIO)
— Function binds, threads, regex, NonCopyable base, Hashing algorithms and more are now using C++11 libraries
- Replaced all FOR_EACH calls with standard, range, “for” calls

318 Diff Diff IonutCava picture IonutCava Sat 30 Aug, 2014 17:35:53 +0000

[Ionut]
- Wrapped the entire code in a “Divide” namespace
- VertexBuffers now call “shrink_to_fit” on all internal data storage
- Improved some vector performance by preferring “emplace_back” instead of “push_back” + proepr usage of reserve / resize
- Wrapped OIS specific types and classes in Divide::Input namespace
- Added the messageBox.layout file (forgot it in the previous few commits)

314 Diff Diff IonutCava picture IonutCava Sun 24 Aug, 2014 19:54:33 +0000

[Ionut]
- More AI work
- Added some needed audio dlls

301 IonutCava picture IonutCava Tue 05 Aug, 2014 20:55:30 +0000

[Ionut]
- Code cleanups and comments (ShaderManager, ShaderProgram, Texture)
- Fixed some singleton destruction order between ResourceCache and ShaderManager to properly unload remaining shader programs