A single‐header C++ neural-network!

I wrote a code sample of a Neural network written in C++ that utilises only three popular libraries. This code has the following abilities:

1. Specify any number of inputs/outputs

2. Add as many hidden layers as you like

3. Pick Sigmoid or ReLU activations

4. Do forward passes and simple SGD training via back-propagation

// NeuralNetOCV.hpp
#pragma once
#include <vector>
#include <cmath>
#include <opencv2/core.hpp>

class NeuralNetOCV {
public:
// ctor: specify #inputs and #outputs
NeuralNetOCV(int inputSize, int outputSize)
{
layers_.push_back(inputSize);
layers_.push_back(outputSize);
}

// call before initialize(): insert a hidden layer of given width
void addHiddenLayer(int width)
{
layers_.insert(layers_.end() – 1, width);
}

// init RNG (seed) + choose activation (0=sigmoid, 1=ReLU)
void initialize(int seed = 0, int activation = 0)
{
activation_ = activation ? RELU : SIGMOID;
rng_ = cv::RNG(seed);

int L = (int)layers_.size();
weights_.resize(L – 1);
biases_ .resize(L – 1);

for (int l = 0; l < L – 1; ++l) {
int inN = layers_[l];
int outN = layers_[l+1];

weights_[l].assign(outN, std::vector<double>(inN));
biases_ [l].assign(outN, 0.0);

for (int i = 0; i < outN; ++i) {
// uniform in [−1,1]
biases_[l][i] = rng_.uniform(-1.0, 1.0);
for (int j = 0; j < inN; ++j)
weights_[l][i][j] = rng_.uniform(-1.0, 1.0);
}
}
}

// standard feed‐forward
std::vector<double> feedForward(const std::vector<double>& x)
{
std::vector<double> a = x;
int L = (int)weights_.size();

for (int l = 0; l < L; ++l) {
int outN = layers_[l+1];
std::vector<double> nextA(outN);

for (int i = 0; i < outN; ++i) {
double z = biases_[l][i];
for (int j = 0; j < layers_[l]; ++j)
z += weights_[l][i][j] * a[j];
nextA[i] = activate(z);
}
a = std::move(nextA);
}
return a;
}

// simple SGD on MSE loss (one sample at a time)
void train(const std::vector<std::vector<double>>& X,
const std::vector<std::vector<double>>& Y,
int epochs, double lr)
{
int N = (int)X.size();
for (int e = 0; e < epochs; ++e)
for (int i = 0; i < N; ++i)
backprop(X[i], Y[i], lr);
}

private:
enum Act { SIGMOID=0, RELU=1 };
int activation_;
cv::RNG rng_;

std::vector<int> layers_;
// weights_[l][i][j] = weight from layer-l neuron-j to layer-(l+1) neuron-i
std::vector<std::vector<std::vector<double>>> weights_;
// biases_[l][i] = bias for layer-(l+1) neuron-i
std::vector<std::vector<double>> biases_;

// activation and its derivative (by output value y)
double activate(double x)
{
if (activation_ == SIGMOID)
return 1.0 / (1.0 + std::exp(-x));
else // RELU
return x > 0.0 ? x : 0.0;
}

double activateDeriv(double y)
{
if (activation_ == SIGMOID)
return y * (1.0 – y);
else // RELU
return y > 0.0 ? 1.0 : 0.0;
}

// backprop one (x,y) pair
void backprop(const std::vector<double>& x,
const std::vector<double>& y,
double lr)
{
int L = (int)layers_.size();
// store activations & pre‐acts
std::vector<std::vector<double>> a(L), z(L-1);
a[0] = x;

for (int l = 0; l < L-1; ++l) {
int outN = layers_[l+1];
z[l].resize(outN);
a[l+1].resize(outN);
for (int i = 0; i < outN; ++i) {
double sum = biases_[l][i];
for (int j = 0; j < layers_[l]; ++j)
sum += weights_[l][i][j] * a[l][j];
z[l][i] = sum;
a[l+1][i] = activate(sum);
}
}

// δ at output
std::vector<std::vector<double>> delta(L-1);
int outN = layers_.back();
delta[L-2].resize(outN);
for (int i = 0; i < outN; ++i) {
double diff = a.back()[i] – y[i];
delta[L-2][i] = diff * activateDeriv(a.back()[i]);
}

// backprop hidden δ’s
for (int l = L-3; l >= 0; –l) {
int n = layers_[l+1],
nNext = layers_[l+2];
delta[l].resize(n);
for (int i = 0; i < n; ++i) {
double sum = 0;
for (int j = 0; j < nNext; ++j)
sum += weights_[l+1][j][i] * delta[l+1][j];
delta[l][i] = sum * activateDeriv(a[l+1][i]);
}
}

// gradient step
for (int l = 0; l < L-1; ++l) {
int outN = layers_[l+1];
for (int i = 0; i < outN; ++i) {
biases_[l][i] -= lr * delta[l][i];
for (int j = 0; j < layers_[l]; ++j)
weights_[l][i][j] -= lr * delta[l][i] * a[l][j];
}
}
}
};

The class can be called in the main function. The main function may become below function:

#include “NeuralNetOCV.hpp”
#include <iostream>

int main() {
NeuralNetOCV nn(2,1);
nn.addHiddenLayer(4);
nn.initialize(123, 0); // seed=123, sigmoid
// XOR
std::vector<std::vector<double>> X = {{0,0},{0,1},{1,0},{1,1}};
std::vector<std::vector<double>> Y = {{0},{1},{1},{0}};
nn.train(X,Y, 5000, 0.5);

for (auto &x : X) {
auto out = nn.feedForward(x);
std::cout << x[0] << “,” << x[1]
<< ” -> ” << out[0] << “\n”;
}
}

There is a detailed description of what NeuralNetOCV is doing, layer by layer:

1. Core Data Structures

layers_ a vector of ints, e.g. [2, 4, 1] means 2 inputs → 4 hidden → 1 output.
weights_ & biases_ nested std::vectors:
- weights_[ℓ][i][j] is the weight from neuron j in layer ℓ to neuron i in layer ℓ+1.
- biases_[ℓ][i] is the bias for neuron i in layer ℓ+1.
cv::RNG rng_ seeds and draws uniform [–1,1] values for your initial weights & biases—no <random>.

2. Public API • Constructor

NeuralNetOCV(int inputSize, int outputSize);

Builds layers_ = {inputSize, outputSize}; you must add hidden layers before calling initialize().

addHiddenLayer(width): Inserts a hidden layer just before the output layer.

initialize (seed, activationMode):

· Seeds rng_ via cv::RNG(seed).

· Flattens your network’s weight/bias matrices to random uniform [–1,1].

· Selects SIGMOID or RELU via a simple int flag (0/1).

· feedForward(x): Takes x (size == layers_[0]), then for each layer ℓ:

a) Compute zᵢ = biasᵢ + Σⱼ weightᵢⱼ * aⱼ

b) Apply activation: aᵢ = σ(zᵢ) or ReLU(zᵢ)

· Returns the final vector.

· train(X, Y, epochs, lr): Repeats simple online SGD on MSE: backprop(xᵏ, yᵏ, lr) for every sample, every epoch.

3. Back‐Propagation Details

· Storage

a) a[ℓ] holds activations at layer ℓ.

b) z[ℓ] holds pre-activations (the “z” values) at layer ℓ+1.

· Error at Output \deltaₙ = (aₙ − yₙ) · σ′(aₙ)

· Propagating Back \deltaˡ = (Wˡ⁺¹ᵀ · \deltaˡ⁺¹) · σ′(aˡ)

· Gradient Step Wˡ ← Wˡ − lr · (δˡ ⊗ aˡ⁻¹) bˡ ← bˡ − lr · δˡ

4. Why This Design?

· Minimal Dependencies: only <vector>, <cmath>, plus OpenCV core.

· Clarity: everything’s in plain vectors—no funky templates or hidden meta-programming.

· Flexibility: add N hidden layers, choose activation, swap in other losses or optimisers.

5. Possible Extensions:

Matrix Acceleration: Convert weights/biases to cv::Mat and use cv::gemm() to speed up your forward/backward passes. On CUDA builds, drop in cv::cuda::GpuMat + cv::cuda::gemm().

Other Losses & Optimizers:

o Cross‐entropy: replace the MSE backprop for classification tasks.

o Momentum or Adam: keep velocity terms per weight, adjust updates accordingly.

o Mini-batch training: accumulate gradients over a batch before applying.

o Regularization & Dropout: L1/L2 penalties on weights, or randomly zero out activations during training for robustness.

o Layer Types: Throw in convolutional or recurrent layers by abstracting out the “dense” layer logic into virtual base classes.

Khairuzzaman Mamun

A single‐header C++ neural-network!

Leave a comment Cancel reply

A single‐header C++ neural-network!

Share this:

Leave a comment Cancel reply