fix multinomial sampling (#1228)

* fix * fix repe penal --------- Co-authored-by: grimoire <[email protected]>
InternLM · Mar 3, 2024 · 79ac87b · 79ac87b
1 parent f0dabee
commit 79ac87b
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 4 deletions.
diff --git a/lmdeploy/pytorch/engine/logits_process.py b/lmdeploy/pytorch/engine/logits_process.py
@@ -12,6 +12,7 @@ def _process_temperature(scores: torch.Tensor,
                          temperature: torch.Tensor,
                          inplace: bool = True):
     """process temperature."""
+    temperature = temperature.to(scores.dtype)
     if not inplace:
         scores = scores / temperature[:, None]
     else:
@@ -42,6 +43,7 @@ def _process_repetition_penalty(scores: torch.Tensor,
                                 inplace: bool = True):
     """process repetition penalty."""
     score = torch.gather(scores, 1, input_ids)
+    penalty = penalty.to(score.dtype)
     score = torch.where(score < 0, score * penalty[:, None],
                         score / penalty[:, None])
     if not inplace:

diff --git a/lmdeploy/pytorch/kernels/multinomial_sampling.py b/lmdeploy/pytorch/kernels/multinomial_sampling.py
@@ -22,7 +22,7 @@ def _multinomial_sampling_kernel(Scores, Seeds, Offsets, Indices, Outputs,
 
     samp = tl.rand(seed, offset)[:, None]
     acc = tl.zeros((BLOCK, ), dtype=Scores.dtype.element_ty)
-    output = tl.full((BLOCK, ), -1, dtype=tl.int64)
+    output = tl.full((BLOCK, ), -1, dtype=Outputs.dtype.element_ty)
 
     for b_idx in range(0, num_tokens, BLOCK_N):
         s_off = b_idx + n_off
@@ -31,8 +31,8 @@ def _multinomial_sampling_kernel(Scores, Seeds, Offsets, Indices, Outputs,
                          s_off[None, :] * stride_st,
                          mask=s_mask,
                          other=0.0)
-        cum_scores = acc[:, None] + tl.cumsum(scores, 1)
-        acc += tl.sum(scores, 1)
+        cum_scores = acc[:, None] + tl.cumsum(scores, 1).to(acc.dtype)
+        acc += tl.sum(scores, 1).to(acc.dtype)
 
         pre_cum_scores = cum_scores - scores
         valid_mask = (samp > pre_cum_scores) & (samp <= cum_scores)

diff --git a/tests/pytorch/kernel/test_multinomial_sampling.py b/tests/pytorch/kernel/test_multinomial_sampling.py
@@ -19,10 +19,15 @@ def batch_size(self, select_ids):
         yield len(select_ids)
 
     @pytest.fixture
-    def scores(self, num_tokens, batch_size, select_ids):
+    def dtype(self, request):
+        yield request.param
+
+    @pytest.fixture
+    def scores(self, num_tokens, batch_size, select_ids, dtype):
         ret = torch.zeros(batch_size, num_tokens).cuda()
         batch_ids = torch.arange(batch_size).cuda()
         ret[batch_ids, select_ids] = 1
+        ret = ret.to(dtype)
         yield ret
 
     @pytest.fixture
@@ -45,6 +50,8 @@ def gt(self, batch_size, select_ids, indices):
         batch_ids = torch.arange(batch_size).cuda()
         yield indices[batch_ids, select_ids]
 
+    @pytest.mark.parametrize('dtype',
+                             [torch.float32, torch.half, torch.bfloat16])
     @pytest.mark.parametrize(['num_tokens', 'select_ids'], [
         (8, (4, 2) * 30),
         (200, (50, 150)),