I am trying to build a computational shader in OpenGL to execute a skeleton algorithm. I tested the algorithm in a CPU-only version, and that was right there. However, I had trouble porting it to calculate shader code.
The problem is that no matter how many starts of the starting shader I start, the result never changes after the first call. In fact, if I take out a check at the end of a while loop, the program never ends.
I have two areas of memory that I use for input and output. I try to do the trick with glBindBufferBase () basically, while the loop where I change them two (the output of the last round becomes the input for the current round). See Lines 270-381 in main.cpp. This means that I do not copy data back and forth between the CPU and the GPU several times.
So my questions are:
1) Can I do this trick using glBindBuffers, where I exchange them so that I can work with data several times without transferring it back to the CPU? When testing for a smaller problem (just adding short arrays) this worked.
2) If the trick is ok, where am I mistaken?
Note: this code requires 640 x 400 size .pgm (black and white image) called "test.pgm". You can do this in GIMP, but be sure to save it as binary, not ASCII.
This code is compiled with the following flags
g++ -g pgm.cpp main.cpp -lglut -lGLU -lGL -lm -lGLEW -o test
Also, excuse me for using C ++, but with C style tricks. I spend more time in C than in C ++.
main.cpp
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <math.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <GL/glew.h>
#include <GL/glut.h>
#include "skeletonize.hpp"
#include "PGM.hpp"
GLuint programID;
GLuint output_image;
#define IMG_0 0
#define IMG_1 1
#define CMD 2
#define NUM_BUFS 3
#define CMD_BUF_WIDTH 0
#define CMD_BUF_HEIGHT 1
#define CMD_BUF_CMD 2
#define CMD_BUF_RESPONSE 3
#define CMD_BUF_LEN 4
#define CMD_EXPAND 1
#define CMD_THIN_N 2
#define CMD_THIN_S 3
#define CMD_THIN_E 4
#define CMD_THIN_W 5
#define CMD_NORMALIZE 6
#define CMD_REGULARIZE 7
#define INITIALIZED 0
#define NOT_FINISHED 1
GLuint computeProgram;
GLuint buffers[NUM_BUFS];
static GLchar* computeSource;
GLuint shaderProgram;
GLuint textures[2];
GLchar* LoadSource(const char* pFile)
{
struct stat buf;
GLchar *source;
int fd;
if (stat(pFile, &buf) == -1)
{
printf("Error opening file\n");
printf("Error: %s\n", strerror(errno));
return NULL;
}
fd = open(pFile, O_RDONLY);
if (fd == -1)
{
printf("Error opening file. Error: %s\n", strerror(errno));
return NULL;
}
source = new GLchar[buf.st_size + 1];
if (read(fd, source, buf.st_size) == -1)
{
printf("Error reading file. Error: %s\n", strerror(errno));
delete[] source;
return NULL;
}
source[buf.st_size] = '\0';
return source;
}
const GLchar* vertexSource =
"#version 450 core\n"
"in vec2 position;"
"in vec2 texcoord;"
"out vec2 Texcoord;"
"void main()"
"{"
" Texcoord = texcoord;"
" gl_Position = vec4(position, 0.0, 1.0);"
"}";
const GLchar* fragmentSource =
"#version 450 core\n"
"in vec2 Texcoord;"
"out vec4 outColor;"
"uniform sampler2D texData;"
"void main()"
"{"
" vec4 imColor = texture(texData, Texcoord);"
" outColor = vec4(0.0, imColor.r, 0.0, 1.0);"
"}";
void checkError(int line)
{
GLint err;
do
{
err = glGetError();
switch (err)
{
case GL_NO_ERROR:
break;
case GL_INVALID_ENUM:
printf("%d: Invalid enum!\n", line);
break;
case GL_INVALID_VALUE:
printf("%d: Invalid value\n", line);
break;
case GL_INVALID_OPERATION:
printf("%d: Invalid operation\n", line);
break;
case GL_INVALID_FRAMEBUFFER_OPERATION:
printf("%d: Invalid framebuffer operation\n", line);
break;
case GL_OUT_OF_MEMORY:
printf("%d: Out of memory\n", line);
break;
default:
printf("%d: glGetError default case. Should not happen!\n", line);
}
} while (err != GL_NO_ERROR);
}
void display()
{
glClearColor(0.0f, 0.0f, 1.0f, 0.0f);
glClear(GL_COLOR_BUFFER_BIT);
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
glFlush();
glutSwapBuffers();
}
void reshape(int width,int height)
{
double w2h = (height>0) ? (double)width/height : 1;
glViewport(0,0, width,height);
}
void runComputeProgram(uint32_t *data, uint32_t *data2)
{
int width = 640;
int height = 400;
uint32_t *ptr;
uint32_t cmd[CMD_BUF_LEN];
computeSource = LoadSource("compute.shader");
if (computeSource == NULL)
{
return;
}
GLuint computeShader = glCreateShader(GL_COMPUTE_SHADER);
glShaderSource(computeShader, 1, &computeSource, NULL);
glCompileShader(computeShader);
computeProgram = glCreateProgram();
glAttachShader(computeProgram, computeShader);
glLinkProgram(computeProgram);
GLint status;
glGetProgramiv(computeProgram, GL_LINK_STATUS, &status);
if (status == GL_TRUE)
{
printf("link good\n");
}
else
{
printf("link bad\n");
GLchar log[4096];
GLsizei len;
glGetProgramInfoLog(computeProgram, 4096, &len, log);
printf("%s\n", log);
return;
}
cmd[CMD_BUF_CMD] = CMD_NORMALIZE;
cmd[CMD_BUF_WIDTH] = width;
cmd[CMD_BUF_HEIGHT] = height;
glGenBuffers(NUM_BUFS, buffers);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffers[CMD]);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(cmd), cmd, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffers[IMG_0]);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32_t) * width * height, data, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffers[IMG_1]);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32_t) * width * height, data2, GL_DYNAMIC_DRAW);
glUseProgram(computeProgram);
glDispatchCompute(width / 16, height / 16, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffers[IMG_1]);
ptr = (uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffers[CMD]);
ptr = (uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
int i = 0;
do
{
printf("iteration: %d", i);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
cmd[CMD_BUF_RESPONSE] = INITIALIZED;
switch (i % 4)
{
case 0:
cmd[CMD_BUF_CMD] = CMD_THIN_N;
break;
case 1:
cmd[CMD_BUF_CMD] = CMD_THIN_S;
break;
case 2:
cmd[CMD_BUF_CMD] = CMD_THIN_E;
break;
case 3:
cmd[CMD_BUF_CMD] = CMD_THIN_W;
break;
}
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(cmd), cmd, GL_DYNAMIC_DRAW);
glDispatchCompute(width / 16, height / 16, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
if (i % 2 == 0)
{
printf("Input is now img_1. Output is img_0\n");
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffers[IMG_1]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffers[IMG_0]);
checkError(__LINE__);
}
else
{
printf("Input is now img_0. Output is img_1\n");
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffers[IMG_0]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffers[IMG_1]);
checkError(__LINE__);
}
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffers[CMD]);
ptr = (uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
printf("cmd issued at start: %d response: %d\n", ptr[CMD_BUF_CMD], ptr[CMD_BUF_RESPONSE]);
i++;
} while(ptr[CMD_BUF_RESPONSE] != INITIALIZED && i < 10);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
cmd[CMD_BUF_CMD] = CMD_REGULARIZE;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffers[CMD]);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(cmd), cmd, GL_DYNAMIC_DRAW);
glDispatchCompute(width / 16, height / 16, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffers[CMD]);
ptr = (uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
printf("Regularize: cmd: %d width: %d height: %d response: %d\n", ptr[CMD_BUF_CMD], ptr[CMD_BUF_WIDTH], ptr[CMD_BUF_HEIGHT], ptr[CMD_BUF_RESPONSE]);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glGenTextures(2, textures);
checkError(__LINE__);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, textures[0]);
checkError(__LINE__);
if (i % 2 == 0)
{
printf("output image is img_1\n");
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffers[IMG_1]);
ptr = (uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
}
else
{
printf("output image is img_0\n");
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffers[IMG_0]);
ptr = (uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
}
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glUseProgram(shaderProgram);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RED, GL_UNSIGNED_INT, ptr);
checkError(__LINE__);
glUniform1i(glGetUniformLocation(shaderProgram, "texData"), 0);
checkError(__LINE__);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
checkError(__LINE__);
}
void initGL()
{
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
GLuint vbo;
glGenBuffers(1, &vbo);
GLfloat vertices[] = {
-1.0f, 1.0f, 0.0f, 0.0f,
1.0f, 1.0f, 1.0f, 0.0f,
1.0f, -1.0f, 1.0f, 1.0f,
-1.0f, -1.0f, 0.0f, 1.0f
};
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
GLuint ebo;
glGenBuffers(1, &ebo);
GLuint elements[] = {
0, 1, 2,
2, 3, 0
};
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(elements), elements, GL_STATIC_DRAW);
GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertexShader, 1, &vertexSource, NULL);
glCompileShader(vertexShader);
GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragmentShader, 1, &fragmentSource, NULL);
glCompileShader(fragmentShader);
shaderProgram = glCreateProgram();
glAttachShader(shaderProgram, vertexShader);
glAttachShader(shaderProgram, fragmentShader);
glBindFragDataLocation(shaderProgram, 0, "outColor");
glLinkProgram(shaderProgram);
glUseProgram(shaderProgram);
GLint posAttrib = glGetAttribLocation(shaderProgram, "position");
glEnableVertexAttribArray(posAttrib);
glVertexAttribPointer(posAttrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat), 0);
GLint texAttrib = glGetAttribLocation(shaderProgram, "texcoord");
glEnableVertexAttribArray(texAttrib);
glVertexAttribPointer(texAttrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat), (void *)(2 * sizeof(GLfloat)));
checkError(__LINE__);
}
int main(int argc, char** argv)
{
PGM pgmImage;
pgmImage.ReadFile("test.pgm");
uint32_t *data = new uint32_t[pgmImage.GetHeight() * pgmImage.GetWidth()];
uint32_t *data2 = new uint32_t[pgmImage.GetHeight() * pgmImage.GetWidth()];
unsigned int size = pgmImage.GetHeight() * pgmImage.GetWidth();
uint8_t *pgmData = pgmImage.GetData();
for (int i=0; i < size; i++)
{
data[i] = pgmData[i];
}
int count = 0;
for (int i =0; i < pgmImage.GetHeight() * pgmImage.GetWidth(); i++)
{
if (data[i] == 0xFF)
{
count++;
}
}
printf("count: %d\n", count);
glutInitWindowSize(640, 400);
glutInitWindowPosition (140, 140);
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
glutInit(&argc, argv);
glutCreateWindow( "OpenGL Application" );
glutDisplayFunc(display);
glutReshapeFunc(reshape);
glewExperimental = true;
if (glewInit() != GLEW_OK) {
fprintf(stderr, "Failed to initialize GLEW\n");
return -1;
}
initGL();
runComputeProgram(data, data2);
checkError(__LINE__);
glutMainLoop();
return 0;
}
compute.shader
#version 450 core
#define WIDTH 0 // Width of image
#define HEIGHT 1 // Height of image
#define CMD 2 // Command to execute
#define RESPONSE 3 // Response to command
#define BUF_LEN 4
#define CMD_UNUSED 1 // TODO: remove this. Will have to be mirroed in C code.
#define CMD_THIN_N 2
#define CMD_THIN_S 3
#define CMD_THIN_E 4
#define CMD_THIN_W 5
#define CMD_NORMALIZE 6
#define CMD_REGULARIZE 7
#define NOT_FINISHED 1
layout (local_size_x = 16, local_size_y = 16) in;
layout (std430, binding = 0) buffer Cmd {
uint cmd_buf[BUF_LEN];
};
layout (std430, binding = 1) buffer Img1 {
uint image_0[];
};
layout (std430, binding = 2) buffer Img2 {
uint image_1[];
};
int sigma(uint data[9]) {
int i;
int sigma = 0;
for (i=1; i < 9; i++)
{
sigma += int(data[i]);
}
return sigma;
}
int chi(uint data[9]) {
int chi;
chi = int(data[1] != data[3]) +
int(data[3] != data[5]) +
int(data[5] != data[7]) +
int(data[7] != data[1]) +
2 * ( int((data[2] > data[1]) && (data[2] > data[3])) ) +
int((data[4] > data[3]) && (data[4] > data[5])) +
int((data[6] > data[5]) && (data[6] > data[7])) +
int((data[8] > data[7]) && (data[8] > data[1]));
return chi;
}
int getPos(in int x, int y) {
return y * int(cmd_buf[WIDTH]) + x;
}
uint getVal(in int pos) {
return image_0[ uint(pos) ];
}
int removePoint(uint neighborhood[9]) {
int x = int(gl_GlobalInvocationID.x);
int y = int(gl_GlobalInvocationID.y);
if (chi(neighborhood) == 2 && sigma(neighborhood) != 1) {
image_1[getPos(x, y)] = 0;
cmd_buf[RESPONSE] = NOT_FINISHED;
return 1;
}
else
{
image_1[getPos(x,y)] = 1;
}
return 0;
}
void getNeighborhood(inout uint neighborhood[9]) {
int x = int(gl_GlobalInvocationID.x);
int y = int(gl_GlobalInvocationID.y);
int bottom = int(cmd_buf[WIDTH] * (cmd_buf[HEIGHT] - 1));
int pos = getPos(x, y);
int width = int(cmd_buf[WIDTH]);
int height = int(cmd_buf[HEIGHT]);
uint pixel;
int i = 0;
for (i=1; i < 9; i++) {
neighborhood[i] = 2;
}
if (pos < width) {
neighborhood[1] = 0;
neighborhood[2] = 0;
neighborhood[3] = 0;
}
if (pos % width == 0) {
neighborhood[1] = 0;
neighborhood[8] = 0;
neighborhood[7] = 0;
}
if ((pos % width) == (width - 1)) {
neighborhood[3] = 0;
neighborhood[4] = 0;
neighborhood[5] = 0;
}
if (pos >= bottom) {
neighborhood[5] = 0;
neighborhood[6] = 0;
neighborhood[7] = 0;
}
for (i=1; i < 9; i++) {
if (neighborhood[i] == 2) {
switch (i) {
case 1:
neighborhood[i] = getVal(pos - 1 - width);
break;
case 2:
neighborhood[i] = getVal(pos - width);
break;
case 3:
neighborhood[i] = getVal(pos + 1 - width);
break;
case 4:
neighborhood[i] = getVal(pos + 1);
break;
case 5:
neighborhood[i] = getVal(pos + width + 1);
break;
case 6:
neighborhood[i] = getVal(pos + width);
break;
case 7:
neighborhood[i] = getVal(pos + width - 1);
break;
case 8:
neighborhood[i] = getVal(pos - 1);
break;
}
}
}
}
void normalize() {
int x = int(gl_GlobalInvocationID.x);
int y = int(gl_GlobalInvocationID.y);
uint val = image_0[getPos(x, y)] == 0x0 ? 0 : 1;
image_0[getPos(x, y)] = val;
image_1[getPos(x, y)] = val;
}
void regularize() {
int x = int(gl_GlobalInvocationID.x);
int y = int(gl_GlobalInvocationID.y);
uint val = image_0[getPos(x, y)] == 0x0 ? 0 : 0xFFFFFFFF;
if (val != 0xFFFFFFFF)
{
cmd_buf[RESPONSE] = 99;
}
image_1[getPos(x, y)] = val;
}
void skeleton() {
int x = int(gl_GlobalInvocationID.x);
int y = int(gl_GlobalInvocationID.y);
uint neighborhood[9];
neighborhood[0] = getVal(getPos(x, y));
if (neighborhood[0] != 1) {
return;
}
getNeighborhood(neighborhood);
switch (cmd_buf[CMD]) {
case CMD_THIN_N:
if (neighborhood[2] == 0 && neighborhood[6] == 1) {
removePoint(neighborhood);
}
break;
case CMD_THIN_S:
if (neighborhood[2] == 1 && neighborhood[6] == 0) {
removePoint(neighborhood);
}
break;
case CMD_THIN_E:
if (neighborhood[4] == 0 && neighborhood[8] == 1) {
removePoint(neighborhood);
}
break;
case CMD_THIN_W:
if (neighborhood[4] == 1 && neighborhood[8] == 0) {
removePoint(neighborhood);
}
break;
}
}
void main() {
switch (cmd_buf[CMD]) {
case CMD_THIN_N:
case CMD_THIN_S:
case CMD_THIN_E:
case CMD_THIN_W:
skeleton();
break;
case CMD_NORMALIZE:
normalize();
break;
case CMD_REGULARIZE:
regularize();
break;
}
}
pgm.cpp
#include "PGM.hpp"
#define PGM_HEADER "P5"
PGM::PGM()
{
mpData = NULL;
Clear();
}
PGM::~PGM()
{
Clear();
}
uint8_t* PGM::GetData()
{
return mpImgData;
}
uint16_t PGM::GetWidth()
{
return mWidth;
}
uint16_t PGM::GetHeight()
{
return mHeight;
}
uint8_t PGM::GetMaxWhite()
{
return mMaxWhite;
}
void PGM::Clear()
{
if (mpData != NULL)
{
delete[] mpData;
}
mpImgData = NULL;
mWidth = 0;
mHeight = 0;
mMaxWhite = 255;
}
int PGM::PopulateFields(size_t size)
{
int i;
bool EOL = false;
bool haveWhite = false;
bool comment = false;
if (mpData == NULL) { return -1; }
if ((mpData[0] != 0x50) || (mpData[1] != 0x35)) { return -2; }
for (i = 2; i < size; i++)
{
if (mpData[i] == '#')
{
comment = true;
continue;
}
if (mpData[i] == 0x0A && comment == true)
{
comment = false;
break;
}
if (comment == true)
{
continue;
}
}
i++;
sscanf((char *)&mpData[i], "%4" SCNu16 " %4" SCNu16, &mWidth, &mHeight);
for (i; i < size; i++)
{
if (mpData[i] == 0x0A && EOL == false)
{
EOL = true;
continue;
}
if (EOL == true && haveWhite == false)
{
sscanf((char *)&mpData[i], "%3" SCNu8, &mMaxWhite);
haveWhite = true;
}
if (haveWhite == true && mpData[i] == 0x0A)
{
i++;
break;
}
}
if (i == size)
{
return -3;
}
mpImgData = &mpData[i];
return 0;
}
int PGM::ReadFile(const char *pPath)
{
struct stat st;
int fd;
if (this->mpData != NULL)
{
Clear();
}
if (stat(pPath, &st) != 0)
{
return 1;
}
fd = open(pPath, O_RDONLY);
if (fd == -1)
{
return 1;
}
mpData = new uint8_t[st.st_size];
if (this->mpData == NULL)
{
return 2;
}
if (read(fd, this->mpData, st.st_size) == -1)
{
Clear();
}
close(fd);
PopulateFields(st.st_size);
return 0;
}
pgm.hpp
#ifndef __PGM_H__
#define __PGM_H__
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <inttypes.h>
class PGM
{
public:
int ReadFile(const char *pPath);
uint8_t* GetData();
uint16_t GetWidth();
uint16_t GetHeight();
uint8_t GetMaxWhite();
PGM();
~PGM();
private:
void Clear();
int PopulateFields(size_t size);
uint8_t *mpData;
uint8_t *mpImgData;
uint16_t mWidth;
uint16_t mHeight;
uint8_t mMaxWhite;
};
#endif