diff --git a/LLama.Unittest/BeamTests.cs b/LLama.Unittest/BeamTests.cs
deleted file mode 100644
index 88b25672e..000000000
--- a/LLama.Unittest/BeamTests.cs
+++ /dev/null
@@ -1,73 +0,0 @@
-﻿using System.Text;
-using LLama.Common;
-using LLama.Native;
-using Xunit.Abstractions;
-
-namespace LLama.Unittest;
-
-public sealed class BeamTests
-    : IDisposable
-{
-    private readonly ITestOutputHelper _testOutputHelper;
-    private readonly ModelParams _params;
-    private readonly LLamaWeights _model;
-
-    public BeamTests(ITestOutputHelper testOutputHelper)
-    {
-        _testOutputHelper = testOutputHelper;
-        _params = new ModelParams(Constants.GenerativeModelPath)
-        {
-            ContextSize = 2048,
-            GpuLayerCount = Constants.CIGpuLayerCount,
-        };
-        _model = LLamaWeights.LoadFromFile(_params);
-    }
-
-    public void Dispose()
-    {
-        _model.Dispose();
-    }
-
-    [Fact]
-    public void BasicBeam()
-    {
-        const int num_beams = 2;
-        const int n_predict = 3;
-        const string prompt = "The cat sat on";
-
-        var context = _model.CreateContext(_params);
-
-        var initial_tokens = context.Tokenize(prompt);
-        var batch = new LLamaBatch();
-        batch.AddRange(initial_tokens, 0, LLamaSeqId.Zero, true);
-        context.Decode(batch);
-
-        var decoder = new StreamingTokenDecoder(context);
-        NativeApi.llama_beam_search(context.NativeHandle, (data, state) =>
-        {
-            // Show the current state of every beam.
-            for (var i = 0; i < state.Beams.Length; i++)
-            {
-                ref var view = ref state.Beams[i];
-
-                var decoder = new StreamingTokenDecoder(context);
-                decoder.AddRange(view.Tokens);
-                var tokens = decoder.Read();
-
-                _testOutputHelper.WriteLine($"B{i} ({view.CumulativeProbability}) => '{tokens}'");
-            }
-
-            // Once all beams agree on some tokens read them and append them to the output decoder
-            if (state.CommonPrefixLength > 0)
-            {
-                var view = state.Beams[0];
-
-                decoder.AddRange(view.Tokens.Slice(0, (int)state.CommonPrefixLength));
-                
-            }
-
-        }, IntPtr.Zero, num_beams, initial_tokens.Length, n_predict, Math.Max(1, Environment.ProcessorCount / 2));
-
-        _testOutputHelper.WriteLine($"Final: {prompt}{decoder.Read()}");
-    }
-}
\ No newline at end of file
diff --git a/LLama/Native/LLamaBeamView.cs b/LLama/Native/LLamaBeamView.cs
deleted file mode 100644
index dcd583ba3..000000000
--- a/LLama/Native/LLamaBeamView.cs
+++ /dev/null
@@ -1,40 +0,0 @@
-﻿using System;
-using System.Runtime.InteropServices;
-
-namespace LLama.Native;
-
-/// <summary>
-/// Information about a single beam in a beam search
-/// </summary>
-[StructLayout(LayoutKind.Sequential)]
-public struct LLamaBeamView
-{
-    private unsafe LLamaToken* tokens;
-    private nuint n_tokens;
-
-    /// <summary>
-    /// Cumulative beam probability (renormalized relative to all beams)
-    /// </summary>
-    public float CumulativeProbability;
-
-    /// <summary>
-    /// Callback should set this to true when a beam is at end-of-beam.
-    /// </summary>
-    public bool EndOfBeam;
-
-    /// <summary>
-    /// Tokens in this beam
-    /// </summary>
-    public readonly Span<LLamaToken> Tokens
-    {
-        get
-        {
-            unsafe
-            {
-                if (n_tokens > int.MaxValue)
-                    throw new InvalidOperationException("More than 2147483647 tokens is not supported");
-                return new Span<LLamaToken>(tokens, (int)n_tokens);
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git a/LLama/Native/LLamaBeamsState.cs b/LLama/Native/LLamaBeamsState.cs
deleted file mode 100644
index cb214aef3..000000000
--- a/LLama/Native/LLamaBeamsState.cs
+++ /dev/null
@@ -1,49 +0,0 @@
-﻿using System;
-using System.Runtime.InteropServices;
-
-namespace LLama.Native;
-
-/// <summary>
-/// Passed to beam_search_callback function.
-/// Whenever 0 &lt; common_prefix_length, this number of tokens should be copied from any of the beams
-/// (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks.
-/// </summary>
-[StructLayout(LayoutKind.Sequential)]
-public struct LLamaBeamsState
-{
-    /// <summary>
-    /// The state of each individual beam
-    /// </summary>
-    private unsafe LLamaBeamView* beam_views;
-
-    /// <summary>
-    /// Number of elements in beam_views
-    /// </summary>
-    private nuint n_beams;
-
-    /// <summary>
-    /// Current max length of prefix tokens shared by all beams.
-    /// </summary>
-    public ulong CommonPrefixLength;
-
-    /// <summary>
-    /// True iff this is the last callback invocation.
-    /// </summary>
-    public bool LastCall;
-
-    /// <summary>
-    /// The current state of each beam
-    /// </summary>
-    public Span<LLamaBeamView> Beams
-    {
-        get
-        {
-            unsafe
-            {
-                if (n_beams > int.MaxValue)
-                    throw new InvalidOperationException("More than 2147483647 beams is not supported");
-                return new Span<LLamaBeamView>(beam_views, (int)n_beams);
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.BeamSearch.cs b/LLama/Native/NativeApi.BeamSearch.cs
deleted file mode 100644
index 142b997bb..000000000
--- a/LLama/Native/NativeApi.BeamSearch.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-﻿using System;
-using System.Runtime.InteropServices;
-
-namespace LLama.Native;
-
-public static partial class NativeApi
-{
-    /// <summary>
-    /// Type of pointer to the beam_search_callback function.
-    /// </summary>
-    /// <param name="callback_data">callback_data is any custom data passed to llama_beam_search, that is subsequently passed back to beam_search_callbac</param>
-    /// <param name="state"></param>
-    public delegate void LLamaBeamSearchCallback(IntPtr callback_data, LLamaBeamsState state);
-
-    /// <summary>Deterministically returns entire sentence constructed by a beam search.</summary>
-    /// <param name="ctx">Pointer to the llama_context.</param>
-    /// <param name="callback">Invoked for each iteration of the beam_search loop, passing in beams_state.</param>
-    /// <param name="callback_data">A pointer that is simply passed back to callback.</param>
-    /// <param name="n_beams">Number of beams to use.</param>
-    /// <param name="n_past">Number of tokens already evaluated.</param>
-    /// <param name="n_predict">Maximum number of tokens to predict. EOS may occur earlier.</param>
-    /// <param name="n_threads">Number of threads.</param>
-    [DllImport(libraryName, EntryPoint = "llama_beam_search", CallingConvention = CallingConvention.Cdecl)]
-    public static extern void llama_beam_search(SafeLLamaContextHandle ctx, LLamaBeamSearchCallback callback, IntPtr callback_data, ulong n_beams, int n_past, int n_predict, int n_threads);
-}
\ No newline at end of file