Add Gpt2.Infer with Prompt example incl. tokenization encode/decode (#22

)
nietras · Jul 24, 2024 · 9b4c013 · 9b4c013
1 parent 755fa13
commit 9b4c013
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 15 deletions.
diff --git a/src/Llm/Gpt2.Infer.cs b/src/Llm/Gpt2.Infer.cs
@@ -0,0 +1,63 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+
+namespace nietras.LargeLanguageModel;
+
+static partial class Gpt2
+{
+    public static unsafe void Infer(string dataDirectory, ILlm llmToUse,
+        Action<string>? log)
+    {
+        // build the GPT-2 model from a checkpoint
+        using var model = ModelFromCheckpoint(dataDirectory + ModelBinaryFileName);
+        // Arbitrarily limit max tokens here for now
+        var maxTokenCount = Math.Min(256, model.Config.MaxTokenCount);
+
+        var tokenizer = Bpe.ReadGpt2FromTiktokenFile(dataDirectory + TokenizerTiktokenFileName);
+
+        var stopwatch = new Stopwatch();
+        var llm = CreateTimeLlm(llmToUse);
+        var promptTokenIndices = new List<int>();
+
+        int* tokenIndices = stackalloc int[maxTokenCount];
+        var tokenIndicesSpan = new Span<int>(tokenIndices, maxTokenCount);
+        int tokenCount = 0;
+
+        // some memory for generating samples from the model
+        ulong randomNumberState = 1337;
+
+        while (true)
+        {
+            LogNoNewLine("Prompt: ");
+            var line = Console.ReadLine();
+            if (line == null) { continue; }
+
+            promptTokenIndices.Clear();
+            tokenizer.Encode(line, promptTokenIndices);
+
+            var tokensToCopy = Math.Min(maxTokenCount - 1, promptTokenIndices.Count);
+            CollectionsMarshal.AsSpan(promptTokenIndices).Slice(0, tokensToCopy).CopyTo(tokenIndicesSpan);
+            tokenIndicesSpan[tokensToCopy] = EndOfTextTokenIndex;
+            tokenCount = tokensToCopy + 1;
+
+            Log($"Prompt (encode-decode): {tokenizer.TryDecode(tokenIndicesSpan.Slice(0, tokenCount))}");
+            while (tokenCount < maxTokenCount)
+            {
+                // note that inference is wasteful here because for each t,
+                // we re-compute all activations between 0 and t
+                Forward(model, tokenIndices, null, 1, tokenCount, llm, maxTokenCount);
+
+                float* probabilities = model.Outputs!.Probabilities.Ptr + (tokenCount - 1) * model.Config.VocabularySize;
+                float coin = RandomSingle(&randomNumberState);
+                int nextToken = FindSampleIndex(probabilities, model.Config.VocabularySize, coin);
+                tokenIndices[tokenCount] = nextToken;
+                ++tokenCount;
+                var output = tokenizer.TryDecode([nextToken]) ?? string.Empty;
+                LogNoNewLine(output);
+            }
+            Log(string.Empty);
+        }
+    }
+}
diff --git a/src/Llm/Gpt2.Train.cs b/src/Llm/Gpt2.Train.cs
@@ -12,7 +12,6 @@ static partial class Gpt2
     internal const string ModelDebugBinaryFileName = "gpt2_124M_debug_state.bin";
 
     internal const string TokenizerTiktokenFileName = "gpt2.tiktoken";
-    internal const string TokenizerBinaryFileName = "gpt2_tokenizer.bin";
 
     internal const string DataTinyStoriesTrainBinaryFileName = "TinyStories_train.bin";
     internal const string DataTinyStoriesValidationBinaryFileName = "TinyStories_val.bin";
@@ -23,7 +22,6 @@ static partial class Gpt2
     internal static readonly IReadOnlyList<string> FileNames = [
         ModelBinaryFileName,
         ModelDebugBinaryFileName,
-        TokenizerBinaryFileName,
         //DataTinyStoriesTrainBinaryFileName,
         //DataTinyStoriesValidationBinaryFileName,
         TinyShakespeareTrainBinaryFileName,
@@ -49,7 +47,7 @@ public static unsafe void Train(string dataDirectory, ILlm llmToUse)
         var trainTokens = File.Exists(tinyShakespeareTrain) ? tinyShakespeareTrain : tinyStoriesTrain;
         var valTokens = File.Exists(tinyShakespeareValidation) ? tinyShakespeareValidation : tinyStoriesValidation;
         int b = 4; // batch size 4 (i.e. 4 independent token sequences will be trained on)
-        int t = 64; // sequence length 64 (i.e. each sequence is 64 tokens nint). must be <= maxT, which is 1024 for GPT-2
+        int t = 64; // sequence length 64 (i.e. each sequence is 64 tokens int). must be <= maxT, which is 1024 for GPT-2
         using DataLoader trainLoader = new(trainTokens, b, t);
         Log($"Train dataset BatchCount: {trainLoader.BatchCount}");
 

diff --git a/src/Llm/Gpt2.Test.cs → src/Llm/Gpt2.VerifyTrain.cs b/src/Llm/Gpt2.Test.cs → src/Llm/Gpt2.VerifyTrain.cs
@@ -30,7 +30,7 @@ public static ExpectedOutputTensors Create(int batchSize, int tokenCount, int vo
         public Tensor<float> ExpectedLogits { get; } = New([B, T, V], s);
     }
 
-    public static unsafe void Test(string dataDirectory, ILlm llmToUse, int steps, Action<string>? log)
+    public static unsafe void VerifyTrain(string dataDirectory, ILlm llmToUse, int steps, Action<string>? log)
     {
         // build the GPT-2 model from a checkpoint
         using var model = ModelFromCheckpoint(dataDirectory + ModelBinaryFileName);

diff --git a/src/Llm/Gpt2.cs b/src/Llm/Gpt2.cs
@@ -42,29 +42,31 @@ public sealed class Model(Config config) : IDisposable
         // other run state configuration
         public int Batchsize = 0; // the batch size (B) of current forward pass
         public int TokenCount = 0; // the sequence length (T) of current forward pass
+        public int MaxTokenCount = 0; // the max token count in output tensors
 
         [MemberNotNull(nameof(Outputs))]
-        public void EnsureOutputMemory(int B, int T)
+        public void EnsureOutputMemory(int B, int T, int maxT)
         {
             // allocate space for all the outputs if needed (done here, lazily)
             if (Outputs is null)
             {
-                // record the current B,T as well
-                Batchsize = B;
-                TokenCount = T;
-                Outputs = OutputTensors.Create(B, T, Config);
-                Log($"OutputCount: {Outputs.TotalCount}");
+                Outputs = OutputTensors.Create(B, maxT, Config);
+                MaxTokenCount = maxT;
+                Log($"OutputCount: {Outputs.TotalCount} (allocated based on max token count {maxT})");
             }
             else
             {
                 // validate B,T is no larger than what was previously allocated
                 // in principle, we could re-allocate a larger chunk of memory, for now we just error output
-                if (B > Batchsize || T > TokenCount)
+                if (B > Batchsize || T > MaxTokenCount)
                 {
                     throw new InvalidDataException("Batch size or token count is inadequately large" +
-                        $"Model: B={Batchsize} T={TokenCount}, Desired: B={B} T={T}");
+                        $"Model: B={Batchsize} MaxT={MaxTokenCount}, Desired: B={B} T={T}");
                 }
             }
+            // record the current B,T (TODO: REVISE)
+            Batchsize = B;
+            TokenCount = T;
         }
 
         public void Dispose()
@@ -232,7 +234,7 @@ internal static unsafe TrainStepResult TrainStep(Model model,
     }
 
     static unsafe float Forward(Model model, int* inputs,
-        int* targetTokenIndices, int B, int T, TimeLlm llm)
+        int* targetTokenIndices, int B, int T, TimeLlm llm, int? allocateTokenCount = null)
     {
         // targetTokenIndices are optional and could be null
 
@@ -248,7 +250,8 @@ static unsafe float Forward(Model model, int* inputs,
         int H = model.Config.HeadCount;
         int C = model.Config.ChannelCount;
 
-        model.EnsureOutputMemory(B, T);
+        allocateTokenCount ??= T;
+        model.EnsureOutputMemory(B, T, allocateTokenCount.Value);
 
         llm.Part = "0." + nameof(Forward);
         llm.Index = -1;

diff --git a/src/Llm/Runner.cs b/src/Llm/Runner.cs
@@ -26,8 +26,9 @@ public static void Run(string[] args, string dataDirectory, Action<string> log)
         using var logWriter = new StreamWriter(logFilePath);
         Action<string> newLog = t => { log(t); logWriter.WriteLine(t); };
 
+        //Gpt2.Infer(dataDirectory, llm, newLog);
         const int steps = 10;
-        Gpt2.Test(dataDirectory, llm, steps, newLog);
+        Gpt2.VerifyTrain(dataDirectory, llm, steps, newLog);
         //Gpt2.Train(dataDirectory, llm);
     }