Implement Beam Search

Beam Search is a heuristic search algorithm widely used in sequence generation tasks such as machine translation, text generation, and speech recognition. Unlike greedy search, which selects the highest probability token at each step, Beam Search keeps track of multiple candidate sequences to improve accuracy. Your task is to implement the Beam Search algorithm for sequence prediction. Given a starting token and a function that predicts the probability of the next token, your algorithm should return the most probable sequence.

Inputs :

A function model(sequence) which returns a list of possible next tokens along with their probabilities.
beam_width : k which determines how many candidate sequences to maintain at each step.
max_length : n which defines the maximum length of the generated sequence
start_token : START that marks the beginning of the sequence
end_token : END if encountered, should terminate the sequence early.

Output :

The most probable sequence generated using beam search.

Solution

        
import heapq
import numpy as np

class BeamSearch:
    def __init__(self, model, beam_width=3, max_length=10):
        self.model = model  # Probability function or language model
        self.beam_width = beam_width
        self.max_length = max_length

    def search(self, start_token):
        """Performs beam search to find the best sequence."""
        # Priority queue to store sequences with probabilities
        sequences = [(0.0, [start_token])]  # (log_probability, sequence)
        
        for _ in range(self.max_length):
            all_candidates = []
            
            # Expand each sequence
            for log_prob, seq in sequences:
                if seq[-1] == "":  # Stop if end token is reached
                    all_candidates.append((log_prob, seq))
                    continue
                
                # Get next token probabilities (simulated model output)
                next_tokens = self.model(seq)
                
                # Add beam_width candidates
                for token, prob in next_tokens:
                    new_seq = seq + [token]
                    new_log_prob = log_prob + np.log(prob)  # Use log probability to avoid underflow
                    all_candidates.append((new_log_prob, new_seq))
            
            # Keep the top-k sequences with highest probability
            sequences = heapq.nlargest(self.beam_width, all_candidates, key=lambda x: x[0])

        # Return the sequence with the highest probability
        return sequences[0][1]

# Example Model (Simulated)
def dummy_model(sequence):
    """Simulates a model that predicts next token probabilities."""
    vocab = ["hello", "world", ""]
    probs = np.random.dirichlet(np.ones(len(vocab)))  # Random probabilities
    return list(zip(vocab, probs))

# Example usage
beam_search = BeamSearch(dummy_model, beam_width=3, max_length=5)
best_sequence = beam_search.search(start_token="")
print("Best Sequence:", best_sequence)

Code Output