Skip to content

Commit

Permalink
Rename many parameters, cleanup, simplify checks (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
nietras authored May 12, 2024
1 parent c6dddd7 commit 153eb85
Show file tree
Hide file tree
Showing 3 changed files with 256 additions and 275 deletions.
121 changes: 40 additions & 81 deletions src/Llm/Gpt2.Test.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace nietras.LargeLanguageModel;

#pragma warning disable IDE0007 // Use implicit type

internal static partial class Gpt2
{
public static unsafe void Test(string dataDirectory)
Expand Down Expand Up @@ -48,8 +50,22 @@ public static unsafe void Test(string dataDirectory)
state_file.ReadExactlyUnmanaged(expected_grads_memory, model.num_parameters);
state_file.Dispose();

// expected losses are as follows, from Python
float[] expected_losses = {
5.270007133483887f,
4.059706687927246f,
3.3751230239868164f,
2.8007826805114746f,
2.315382242202759f,
1.8490285873413086f,
1.3946564197540283f,
0.9991465210914612f,
0.6240804195404053f,
0.37651097774505615f
};

// overall OK signal for the test
bool allok = true;
bool allOk = true;

// let's do 10 training iterations, following the pytorch code
float* losses = stackalloc float[10];
Expand All @@ -69,34 +85,7 @@ public static unsafe void Test(string dataDirectory)
// error checking at step 0 for reference activations/gradients

// at this point, target should be equal to expected_logits, let's compare
bool logits_ok = true;
for (int i = 0; i < B * T * V; i++)
{
if (i < 3)
{
Log($"{expected_logits[i]} {model.acts.logits[i]}");
}
if (MathF.Abs(expected_logits[i] - model.acts.logits[i]) >= 1e-2)
{
Log($"MISMATCH AT INDEX {i}: {expected_logits[i]} {model.acts.logits[i]}");
logits_ok = false;
break;
}
}
if (!logits_ok) { Log("NOT "); }
Log("OK (LOGITS)");
allok = allok && logits_ok;

// compare the achieved loss
if (MathF.Abs(model.mean_loss - *expected_loss) >= 1e-2)
{
Log($"LOSS MISMATCH: {model.mean_loss} {*expected_loss}");
allok = false;
}
else
{
Log($"LOSS OK: {model.mean_loss} {*expected_loss}");
}
allOk &= check_tensor(expected_logits, model.acts.logits, B * T * V, "Logits");

// finally check all the gradients
var gradoks = new bool[16];
Expand All @@ -119,45 +108,22 @@ public static unsafe void Test(string dataDirectory)
gradoks[15] = check_tensor(grads.lnfb, expected_grads.lnfb, C, "dlnfb");
for (int i = 0; i < 16; i++)
{
allok = allok && gradoks[i];
allOk = allOk && gradoks[i];
}
}

Update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.01f, step + 1);

// print the timing information at the end
Log($"step {step}: loss {model.mean_loss} (took {time_elapsed_s * 1000} ms)");
losses[step] = model.mean_loss;
var expectedLoss = expected_losses[step];
var lossOk = CheckLoss(model.mean_loss, expectedLoss);
allOk = allOk && lossOk;
// print the timing information at the end
Log($"step {step}: loss {model.mean_loss:F6} expected loss {expectedLoss:F6} " +
$"{(lossOk ? "OK" : "FAIL"),-4} (took {time_elapsed_s * 1000:F0} ms)");
}

// expected losses are as follows, from Python
float[] expected_losses = {
5.270007133483887f,
4.059706687927246f,
3.3751230239868164f,
2.8007826805114746f,
2.315382242202759f,
1.8490285873413086f,
1.3946564197540283f,
0.9991465210914612f,
0.6240804195404053f,
0.37651097774505615f
};
// compare
for (int i = 0; i < 10; i++)
{
if (MathF.Abs(losses[i] - expected_losses[i]) >= 1e-2)
{
Log($"LOSS MISMATCH AT STEP {i}: {losses[i]} {expected_losses[i]}");
allok = false;
}
else
{
Log($"loss ok at step {i}: {losses[i]} {expected_losses[i]}");
}
}

Log($"overall okay: {allok}");
Log($"overall okay: {allOk}");

// free everything
free(x);
Expand All @@ -168,35 +134,28 @@ public static unsafe void Test(string dataDirectory)
Free(&model);
}

static bool CheckLoss(float a, float b) => Check(a, b);
static bool Check(float a, float b) => MathF.Abs(a - b) < 0.01f;

// poor man's tensor checker
static unsafe bool check_tensor(float* a, float* b, int n, string label)
static unsafe bool check_tensor(float* actual, float* expected, int n, string label)
{
int print_upto = 5;
const int printUpTo = 0;//5;
LogNoNewLine($"{label,-16} ");
bool ok = true;
Log($"{label}");

for (int i = 0; i < n; i++)
{
if (MathF.Abs(a[i] - b[i]) <= 1e-2)
var a = actual[i];
var e = expected[i];
var isOk = Check(a, e);
ok &= isOk;
if (i < printUpTo)
{
if (i < print_upto) { Log("OK "); }
Log("");
LogNoNewLine($"{(isOk ? "OK " : "FAIL")} {a,15} {e,15}");
}
else
{
if (i < print_upto) { Log("NOT OK "); }
ok = false;
}
if (i < print_upto) { Log($"{a[i]} {b[i]}"); }
}
// print the final result
if (ok)
{
Log("TENSOR OK");
}
else
{
Log("TENSOR NOT OK");
}
Log($"TENSOR {(ok ? "OK " : "FAIL")}");
return ok;
}
}
5 changes: 4 additions & 1 deletion src/Llm/Gpt2.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@

namespace nietras.LargeLanguageModel;

#pragma warning disable IDE0007 // Use implicit type

internal static partial class Gpt2
{
static readonly Action<string> Log = t => { Console.WriteLine(t); Trace.WriteLine(t); };
static readonly Action<string> LogNoNewLine = t => { Console.Write(t); Trace.Write(t); };

// ----------------------------------------------------------------------------
// GPT-2 model definition
Expand Down Expand Up @@ -110,7 +113,7 @@ public unsafe struct ActivationTensors
}
float* acts_memory = malloc<float>(num_activations);
float**[] ptrs = [
&acts->encoded,
&acts->encoded,
&acts->ln1,
&acts->ln1_mean,
&acts->ln1_rstd,
Expand Down
Loading

0 comments on commit 153eb85

Please sign in to comment.