Skip to content

Commit

Permalink
Add Gpt2.Infer with Prompt example incl. tokenization encode/decode (#22
Browse files Browse the repository at this point in the history
)
  • Loading branch information
nietras authored Jul 24, 2024
1 parent 755fa13 commit 9b4c013
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 15 deletions.
63 changes: 63 additions & 0 deletions src/Llm/Gpt2.Infer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace nietras.LargeLanguageModel;

static partial class Gpt2
{
public static unsafe void Infer(string dataDirectory, ILlm llmToUse,
Action<string>? log)
{
// build the GPT-2 model from a checkpoint
using var model = ModelFromCheckpoint(dataDirectory + ModelBinaryFileName);
// Arbitrarily limit max tokens here for now
var maxTokenCount = Math.Min(256, model.Config.MaxTokenCount);

var tokenizer = Bpe.ReadGpt2FromTiktokenFile(dataDirectory + TokenizerTiktokenFileName);

var stopwatch = new Stopwatch();
var llm = CreateTimeLlm(llmToUse);
var promptTokenIndices = new List<int>();

int* tokenIndices = stackalloc int[maxTokenCount];
var tokenIndicesSpan = new Span<int>(tokenIndices, maxTokenCount);
int tokenCount = 0;

// some memory for generating samples from the model
ulong randomNumberState = 1337;

while (true)
{
LogNoNewLine("Prompt: ");
var line = Console.ReadLine();
if (line == null) { continue; }

promptTokenIndices.Clear();
tokenizer.Encode(line, promptTokenIndices);

var tokensToCopy = Math.Min(maxTokenCount - 1, promptTokenIndices.Count);
CollectionsMarshal.AsSpan(promptTokenIndices).Slice(0, tokensToCopy).CopyTo(tokenIndicesSpan);
tokenIndicesSpan[tokensToCopy] = EndOfTextTokenIndex;
tokenCount = tokensToCopy + 1;

Log($"Prompt (encode-decode): {tokenizer.TryDecode(tokenIndicesSpan.Slice(0, tokenCount))}");
while (tokenCount < maxTokenCount)
{
// note that inference is wasteful here because for each t,
// we re-compute all activations between 0 and t
Forward(model, tokenIndices, null, 1, tokenCount, llm, maxTokenCount);

float* probabilities = model.Outputs!.Probabilities.Ptr + (tokenCount - 1) * model.Config.VocabularySize;
float coin = RandomSingle(&randomNumberState);
int nextToken = FindSampleIndex(probabilities, model.Config.VocabularySize, coin);
tokenIndices[tokenCount] = nextToken;
++tokenCount;
var output = tokenizer.TryDecode([nextToken]) ?? string.Empty;
LogNoNewLine(output);
}
Log(string.Empty);
}
}
}
4 changes: 1 addition & 3 deletions src/Llm/Gpt2.Train.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ static partial class Gpt2
internal const string ModelDebugBinaryFileName = "gpt2_124M_debug_state.bin";

internal const string TokenizerTiktokenFileName = "gpt2.tiktoken";
internal const string TokenizerBinaryFileName = "gpt2_tokenizer.bin";

internal const string DataTinyStoriesTrainBinaryFileName = "TinyStories_train.bin";
internal const string DataTinyStoriesValidationBinaryFileName = "TinyStories_val.bin";
Expand All @@ -23,7 +22,6 @@ static partial class Gpt2
internal static readonly IReadOnlyList<string> FileNames = [
ModelBinaryFileName,
ModelDebugBinaryFileName,
TokenizerBinaryFileName,
//DataTinyStoriesTrainBinaryFileName,
//DataTinyStoriesValidationBinaryFileName,
TinyShakespeareTrainBinaryFileName,
Expand All @@ -49,7 +47,7 @@ public static unsafe void Train(string dataDirectory, ILlm llmToUse)
var trainTokens = File.Exists(tinyShakespeareTrain) ? tinyShakespeareTrain : tinyStoriesTrain;
var valTokens = File.Exists(tinyShakespeareValidation) ? tinyShakespeareValidation : tinyStoriesValidation;
int b = 4; // batch size 4 (i.e. 4 independent token sequences will be trained on)
int t = 64; // sequence length 64 (i.e. each sequence is 64 tokens nint). must be <= maxT, which is 1024 for GPT-2
int t = 64; // sequence length 64 (i.e. each sequence is 64 tokens int). must be <= maxT, which is 1024 for GPT-2
using DataLoader trainLoader = new(trainTokens, b, t);
Log($"Train dataset BatchCount: {trainLoader.BatchCount}");

Expand Down
2 changes: 1 addition & 1 deletion src/Llm/Gpt2.Test.cs → src/Llm/Gpt2.VerifyTrain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public static ExpectedOutputTensors Create(int batchSize, int tokenCount, int vo
public Tensor<float> ExpectedLogits { get; } = New([B, T, V], s);
}

public static unsafe void Test(string dataDirectory, ILlm llmToUse, int steps, Action<string>? log)
public static unsafe void VerifyTrain(string dataDirectory, ILlm llmToUse, int steps, Action<string>? log)
{
// build the GPT-2 model from a checkpoint
using var model = ModelFromCheckpoint(dataDirectory + ModelBinaryFileName);
Expand Down
23 changes: 13 additions & 10 deletions src/Llm/Gpt2.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,31 @@ public sealed class Model(Config config) : IDisposable
// other run state configuration
public int Batchsize = 0; // the batch size (B) of current forward pass
public int TokenCount = 0; // the sequence length (T) of current forward pass
public int MaxTokenCount = 0; // the max token count in output tensors

[MemberNotNull(nameof(Outputs))]
public void EnsureOutputMemory(int B, int T)
public void EnsureOutputMemory(int B, int T, int maxT)
{
// allocate space for all the outputs if needed (done here, lazily)
if (Outputs is null)
{
// record the current B,T as well
Batchsize = B;
TokenCount = T;
Outputs = OutputTensors.Create(B, T, Config);
Log($"OutputCount: {Outputs.TotalCount}");
Outputs = OutputTensors.Create(B, maxT, Config);
MaxTokenCount = maxT;
Log($"OutputCount: {Outputs.TotalCount} (allocated based on max token count {maxT})");
}
else
{
// validate B,T is no larger than what was previously allocated
// in principle, we could re-allocate a larger chunk of memory, for now we just error output
if (B > Batchsize || T > TokenCount)
if (B > Batchsize || T > MaxTokenCount)
{
throw new InvalidDataException("Batch size or token count is inadequately large" +
$"Model: B={Batchsize} T={TokenCount}, Desired: B={B} T={T}");
$"Model: B={Batchsize} MaxT={MaxTokenCount}, Desired: B={B} T={T}");
}
}
// record the current B,T (TODO: REVISE)
Batchsize = B;
TokenCount = T;
}

public void Dispose()
Expand Down Expand Up @@ -232,7 +234,7 @@ internal static unsafe TrainStepResult TrainStep(Model model,
}

static unsafe float Forward(Model model, int* inputs,
int* targetTokenIndices, int B, int T, TimeLlm llm)
int* targetTokenIndices, int B, int T, TimeLlm llm, int? allocateTokenCount = null)
{
// targetTokenIndices are optional and could be null

Expand All @@ -248,7 +250,8 @@ static unsafe float Forward(Model model, int* inputs,
int H = model.Config.HeadCount;
int C = model.Config.ChannelCount;

model.EnsureOutputMemory(B, T);
allocateTokenCount ??= T;
model.EnsureOutputMemory(B, T, allocateTokenCount.Value);

llm.Part = "0." + nameof(Forward);
llm.Index = -1;
Expand Down
3 changes: 2 additions & 1 deletion src/Llm/Runner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ public static void Run(string[] args, string dataDirectory, Action<string> log)
using var logWriter = new StreamWriter(logFilePath);
Action<string> newLog = t => { log(t); logWriter.WriteLine(t); };

//Gpt2.Infer(dataDirectory, llm, newLog);
const int steps = 10;
Gpt2.Test(dataDirectory, llm, steps, newLog);
Gpt2.VerifyTrain(dataDirectory, llm, steps, newLog);
//Gpt2.Train(dataDirectory, llm);
}

Expand Down

0 comments on commit 9b4c013

Please sign in to comment.