Rename many parameters, cleanup, simplify checks (#8)

nietras · May 12, 2024 · 153eb85 · 153eb85
1 parent c6dddd7
commit 153eb85
Show file tree

Hide file tree

Showing 3 changed files with 256 additions and 275 deletions.
diff --git a/src/Llm/Gpt2.Test.cs b/src/Llm/Gpt2.Test.cs
@@ -4,6 +4,8 @@
 
 namespace nietras.LargeLanguageModel;
 
+#pragma warning disable IDE0007 // Use implicit type
+
 internal static partial class Gpt2
 {
     public static unsafe void Test(string dataDirectory)
@@ -48,8 +50,22 @@ public static unsafe void Test(string dataDirectory)
         state_file.ReadExactlyUnmanaged(expected_grads_memory, model.num_parameters);
         state_file.Dispose();
 
+        // expected losses are as follows, from Python
+        float[] expected_losses = {
+            5.270007133483887f,
+            4.059706687927246f,
+            3.3751230239868164f,
+            2.8007826805114746f,
+            2.315382242202759f,
+            1.8490285873413086f,
+            1.3946564197540283f,
+            0.9991465210914612f,
+            0.6240804195404053f,
+            0.37651097774505615f
+        };
+
         // overall OK signal for the test
-        bool allok = true;
+        bool allOk = true;
 
         // let's do 10 training iterations, following the pytorch code
         float* losses = stackalloc float[10];
@@ -69,34 +85,7 @@ public static unsafe void Test(string dataDirectory)
                 // error checking at step 0 for reference activations/gradients
 
                 // at this point, target should be equal to expected_logits, let's compare
-                bool logits_ok = true;
-                for (int i = 0; i < B * T * V; i++)
-                {
-                    if (i < 3)
-                    {
-                        Log($"{expected_logits[i]} {model.acts.logits[i]}");
-                    }
-                    if (MathF.Abs(expected_logits[i] - model.acts.logits[i]) >= 1e-2)
-                    {
-                        Log($"MISMATCH AT INDEX {i}: {expected_logits[i]} {model.acts.logits[i]}");
-                        logits_ok = false;
-                        break;
-                    }
-                }
-                if (!logits_ok) { Log("NOT "); }
-                Log("OK (LOGITS)");
-                allok = allok && logits_ok;
-
-                // compare the achieved loss
-                if (MathF.Abs(model.mean_loss - *expected_loss) >= 1e-2)
-                {
-                    Log($"LOSS MISMATCH: {model.mean_loss} {*expected_loss}");
-                    allok = false;
-                }
-                else
-                {
-                    Log($"LOSS OK: {model.mean_loss} {*expected_loss}");
-                }
+                allOk &= check_tensor(expected_logits, model.acts.logits, B * T * V, "Logits");
 
                 // finally check all the gradients
                 var gradoks = new bool[16];
@@ -119,45 +108,22 @@ public static unsafe void Test(string dataDirectory)
                 gradoks[15] = check_tensor(grads.lnfb, expected_grads.lnfb, C, "dlnfb");
                 for (int i = 0; i < 16; i++)
                 {
-                    allok = allok && gradoks[i];
+                    allOk = allOk && gradoks[i];
                 }
             }
 
             Update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.01f, step + 1);
 
-            // print the timing information at the end
-            Log($"step {step}: loss {model.mean_loss} (took {time_elapsed_s * 1000} ms)");
             losses[step] = model.mean_loss;
+            var expectedLoss = expected_losses[step];
+            var lossOk = CheckLoss(model.mean_loss, expectedLoss);
+            allOk = allOk && lossOk;
+            // print the timing information at the end
+            Log($"step {step}: loss {model.mean_loss:F6} expected loss {expectedLoss:F6} " +
+                $"{(lossOk ? "OK" : "FAIL"),-4} (took {time_elapsed_s * 1000:F0} ms)");
         }
 
-        // expected losses are as follows, from Python
-        float[] expected_losses = {
-            5.270007133483887f,
-            4.059706687927246f,
-            3.3751230239868164f,
-            2.8007826805114746f,
-            2.315382242202759f,
-            1.8490285873413086f,
-            1.3946564197540283f,
-            0.9991465210914612f,
-            0.6240804195404053f,
-            0.37651097774505615f
-        };
-        // compare
-        for (int i = 0; i < 10; i++)
-        {
-            if (MathF.Abs(losses[i] - expected_losses[i]) >= 1e-2)
-            {
-                Log($"LOSS MISMATCH AT STEP {i}: {losses[i]} {expected_losses[i]}");
-                allok = false;
-            }
-            else
-            {
-                Log($"loss ok at step {i}: {losses[i]} {expected_losses[i]}");
-            }
-        }
-
-        Log($"overall okay: {allok}");
+        Log($"overall okay: {allOk}");
 
         // free everything
         free(x);
@@ -168,35 +134,28 @@ public static unsafe void Test(string dataDirectory)
         Free(&model);
     }
 
+    static bool CheckLoss(float a, float b) => Check(a, b);
+    static bool Check(float a, float b) => MathF.Abs(a - b) < 0.01f;
+
     // poor man's tensor checker
-    static unsafe bool check_tensor(float* a, float* b, int n, string label)
+    static unsafe bool check_tensor(float* actual, float* expected, int n, string label)
     {
-        int print_upto = 5;
+        const int printUpTo = 0;//5;
+        LogNoNewLine($"{label,-16} ");
         bool ok = true;
-        Log($"{label}");
-
         for (int i = 0; i < n; i++)
         {
-            if (MathF.Abs(a[i] - b[i]) <= 1e-2)
+            var a = actual[i];
+            var e = expected[i];
+            var isOk = Check(a, e);
+            ok &= isOk;
+            if (i < printUpTo)
             {
-                if (i < print_upto) { Log("OK "); }
+                Log("");
+                LogNoNewLine($"{(isOk ? "OK  " : "FAIL")} {a,15} {e,15}");
             }
-            else
-            {
-                if (i < print_upto) { Log("NOT OK "); }
-                ok = false;
-            }
-            if (i < print_upto) { Log($"{a[i]} {b[i]}"); }
-        }
-        // print the final result
-        if (ok)
-        {
-            Log("TENSOR OK");
-        }
-        else
-        {
-            Log("TENSOR NOT OK");
         }
+        Log($"TENSOR {(ok ? "OK  " : "FAIL")}");
         return ok;
     }
 }
diff --git a/src/Llm/Gpt2.cs b/src/Llm/Gpt2.cs
@@ -6,9 +6,12 @@
 
 namespace nietras.LargeLanguageModel;
 
+#pragma warning disable IDE0007 // Use implicit type
+
 internal static partial class Gpt2
 {
     static readonly Action<string> Log = t => { Console.WriteLine(t); Trace.WriteLine(t); };
+    static readonly Action<string> LogNoNewLine = t => { Console.Write(t); Trace.Write(t); };
 
     // ----------------------------------------------------------------------------
     // GPT-2 model definition
@@ -110,7 +113,7 @@ public unsafe struct ActivationTensors
         }
         float* acts_memory = malloc<float>(num_activations);
         float**[] ptrs = [
-        &acts->encoded,
+            &acts->encoded,
             &acts->ln1,
             &acts->ln1_mean,
             &acts->ln1_rstd,