Skip to content

Commit

Permalink
bug-fix: always use stream mode to enable persistent batching
Browse files Browse the repository at this point in the history
  • Loading branch information
sleepwalker committed Sep 4, 2023
1 parent eaccbc0 commit 0770aaf
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions lmdeploy/serve/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ async def generate(request: GenerateRequest, raw_request: Request = None):
generation = VariableInterface.async_engine.generate(
request.prompt,
request.instance_id,
stream_response=request.stream,
stream_response=True, # always use stream to enable batching
sequence_start=request.sequence_start,
sequence_end=request.sequence_end,
request_output_len=request.request_output_len,
Expand All @@ -303,12 +303,14 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
return StreamingResponse(stream_results())
else:
ret = {}
text = ''
tokens = 0
finish_reason = None
async for out in generation:
ret = {
'text': out.response,
'tokens': out.generate_token_len,
'finish_reason': out.finish_reason
}
text += out.response
tokens += out.generate_token_len
finish_reason = out.finish_reason
ret = {'text': text, 'tokens': tokens, 'finish_reason': finish_reason}
return JSONResponse(ret)


Expand Down

0 comments on commit 0770aaf

Please sign in to comment.