diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..5cd7f41
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,29 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  check-n-test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+        cache: 'pip'
+    - name: Install dependencies
+      run: |
+        pip install -r requirements.txt
+        pip install -r requirements-dev.txt
+
+    - name: Formatter
+      run: |
+        ruff format --check
+
+    - name: Unit tests
+      run: |
+        pytest lone_arena/
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fdfc8c7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+/data/
+
+__pycache__/
+.pytest_cache/
+.ruff_cache/
+.vscode/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..a768a1f
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,5 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.1.14
+  hooks:
+  - id: ruff-format
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..b7e6d9e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Contextualist
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6fd05c9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,38 @@
+<img align="right" src="media/lone_arena-sketch-small.png" width="160" />
+
+# Lone Arena
+
+When comparing two LLM checkpoints, human evaluation could be tedious.
+Let's strip down the evaluation process to just a single question:
+
+![lone_arena-ui-en](media/lone_arena-ui-en.png)
+
+Press <kbd>f</kbd> or <kbd>j</kbd> to choose the winner of each match.
+
+Inspired by [Chatbot Arena](https://chat.lmsys.org).
+
+## Get Started
+
+1. In a Python (>= 3.12) environment, `pip install -r requirements.txt`
+2. Fill out `config.toml` with your model endpoint infomation and prompts. See [`config-example.toml`](config-example.toml).
+3. Run `python generate.py config.toml` to gather responses from models.
+4. Run `python evaluate.py config.toml` to host your competition!
+
+
+## Approach
+
+Two models/checkpoints are compared by anonymous evaluation of their responses to the same prompt. For each prompt:
+
+1. For each model, generate 8 sample responses. Run a single-elimination tournament to get top 3 responses. (8 matches x 2 models)
+2. Let the best responses of two models compete, then 2nd best of two models, then 3rd best. Winner of each gets 4.8, 3.2, 2.0 points, respectively. (3 matches)
+
+Matches are shuffled.
+Number of samples and points are configurable.
+In the future, I might implement [Elo](https://en.wikipedia.org/wiki/Elo_rating_system) for comparing multiple models.
+
+## Develop
+
+```bash
+pip install -r requirements-dev.txt
+pre-commit install
+```
diff --git a/config-example.toml b/config-example.toml
new file mode 100644
index 0000000..e8054a4
--- /dev/null
+++ b/config-example.toml
@@ -0,0 +1,33 @@
+data_dir = "./data"
+sample = 8
+
+[[model]]
+# Entries other than `name` are passed to `openai.OpenAI` or `openai.OpenAI.chat.completion.create`
+name = "chatglm3-6b-sft-checkpoint100"
+base_url = "http://localhost:8000/v1"
+api_key = "dummy-key"
+max_retries = 1
+model = ""
+
+[[model]]
+name = "gpt-3.5"
+api_key = "sk-..."
+max_retries = 1
+model = "gpt-3.5-turbo"
+
+
+[[prompt]]
+name = "demo-sleep-zh"
+chat = """
+user: 睡不着怎么办？
+assistant: 你可以试试给我讲个故事呀。
+user: 诶嘿，那你想听什么样的故事呢？
+"""
+
+[[prompt]]
+name = "demo-sleep-en"
+chat = """
+user: What should I do if I can't fall asleep?
+assistant: Hmmm, maybe you can try telling me a story.
+user: lol, what kind of story do you like?
+"""
diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 0000000..eba7c40
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,151 @@
+from lone_arena.chatcup import Top3_1v1
+from lone_arena.config import load_config, Config
+from lone_arena.files import DocumentDir, ResultDir
+from lone_arena.format import format_conversation
+
+import gradio as gr
+
+from queue import Queue
+import argparse
+from functools import partial
+
+req_queue = Queue()
+rsp_queue = Queue()
+prog_notify = Queue()
+
+
+def main(conf: Config):
+    def compete(a, b):
+        ca, cb = (
+            format_conversation(docs.get(a, [])),
+            format_conversation(docs.get(b, [])),
+        )
+        req_queue.put((ca, cb))
+        result = rsp_queue.get()
+        if result == 0:
+            return a, b
+        return b, a
+
+    mnames = [x.name for x in conf.model]
+    pnames = [x.name for x in conf.prompt]
+    docsd = DocumentDir(conf.data_dir)
+    docs = docsd.load(pnames, mnames)
+
+    resultd = ResultDir(conf.data_dir)
+    podiums, pnames_todo = resultd.load(pnames)
+    if podiums:
+        if pnames_todo:
+            print("Partially completed, resuming...")
+        else:
+            msg = f"Loaded completed result. To re-evaluate, remove results from {resultd.result_dir}"
+            print(msg)
+            req_queue.put((msg, ""))
+
+    cup = Top3_1v1(pnames_todo, mnames, conf.sample, conf.top3_scores)
+    todo_match, total_match = cup.nmatch(), cup.nmatch(len(pnames))
+    prog_notify.put((total_match - todo_match, total_match))
+    itournament = cup.run(compete)
+    for pname, podium in zip(pnames_todo, itournament):
+        podiums.append(podium)
+        resultd.dump(podium, docs, pname)
+    msg = f"End of evaluation. Winning responses can be found in {resultd.result_dir}"
+    req_queue.put((msg, ""))
+
+    score_weights = [p.score_weight for p in conf.prompt]
+    result = cup.tabulate_result(podiums, score_weights)
+    return gr.DataFrame(visible=True, value=result)
+
+
+def init():
+    da, db = req_queue.get()
+    cm, tm = prog_notify.get()
+    return da, db, cm, tm, gr.Slider(value=round(cm / tm, 2))
+
+
+def on_decision(completed_match: int, total_match: int, ev_data: gr.EventData):
+    if completed_match == total_match:
+        return gr.Markdown(), gr.Markdown()  # no more updates
+    rsp_queue.put(int(ev_data.target.elem_id[-1]) - 1)  # type: ignore
+    doc_a, doc_b = req_queue.get()
+    return doc_a, doc_b
+
+
+def progbar_update(completed_match: int, total_match: int):
+    if completed_match < total_match:
+        completed_match += 1
+    progress = round(completed_match / total_match, 2)
+    return completed_match, gr.Slider(value=progress)
+
+
+shortcut_js = """
+<script>
+function shortcuts(e) {
+    switch (e.target.tagName.toLowerCase()) {
+        case "input":
+        case "textarea":
+            return;
+    }
+    switch (e.key.toLowerCase()) {
+        case "f": return document.getElementById("choose1").click();
+        case "j": return document.getElementById("choose2").click();
+    }
+}
+document.addEventListener('keypress', shortcuts, false);
+</script>
+"""
+
+
+def ui(conf: Config):
+    with gr.Blocks(
+        title="Lone Arena",
+        head=shortcut_js,
+        theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_lg),
+    ) as demo:
+        completed_match, total_match = gr.State(0), gr.State(1)
+        progbar = gr.Slider(0, 1, 0, label="Progress", container=False)
+        gr.Markdown(
+            """
+        ## Which of the two responses is better?
+        """
+        )
+        with gr.Row(equal_height=True):
+            with gr.Column(variant="panel"):
+                choose1 = gr.Button("👇This one's better [f]", elem_id="choose1")
+                candidate1 = gr.Markdown(line_breaks=True)
+            with gr.Column(variant="panel"):
+                choose2 = gr.Button("👇This one's better [j]", elem_id="choose2")
+                candidate2 = gr.Markdown(line_breaks=True)
+        result_table = gr.DataFrame(visible=True, row_count=len(conf.prompt) + 1)
+
+        gr.on(
+            triggers=[choose1.click, choose2.click],
+            fn=on_decision,
+            inputs=[completed_match, total_match],
+            outputs=[candidate1, candidate2],
+        ).then(
+            progbar_update,
+            inputs=[completed_match, total_match],
+            outputs=[completed_match, progbar],
+        )
+        demo.load(
+            init,
+            outputs=[candidate1, candidate2, completed_match, total_match, progbar],
+        )
+        # workaround for https://github.com/gradio-app/gradio/issues/7101
+        demo.load(
+            lambda: gr.DataFrame(visible=False),
+            outputs=[result_table],
+        )
+        demo.load(partial(main, conf), outputs=[result_table])
+    return demo
+
+
+if __name__ == "__main__":
+    argp = argparse.ArgumentParser(description="Host the evaluation Web UI")
+    argp.add_argument("--port", type=int, default=7860)
+    argp.add_argument("config", type=str)
+    args = argp.parse_args()
+    conf = load_config(args.config)
+
+    demo = ui(conf)
+    demo.launch(server_port=args.port, show_api=False, quiet=True)
diff --git a/generate.py b/generate.py
new file mode 100644
index 0000000..c5c6787
--- /dev/null
+++ b/generate.py
@@ -0,0 +1,65 @@
+from lone_arena.config import load_config, Model, Config
+from lone_arena.files import DocumentDir
+
+import openai
+from tqdm import tqdm
+
+import argparse
+import re
+from typing import Any
+
+ROLE_TAG = re.compile(r"^(user|assistant|system): ?(.*)$")
+
+
+def parse_chat(chat: str) -> list[dict]:
+    msg = []
+    for li in chat.splitlines():
+        if (m := ROLE_TAG.match(li)) is not None:
+            msg.append({"role": m.group(1), "content": m.group(2)})
+            continue
+        assert len(msg) > 0, f"missing role tag for {chat!r}"
+        msg[-1]["content"] += "\n" + li
+    return msg
+
+
+def split_params(params: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
+    params = params.copy()
+    client_params = {}
+    for k in ["api_key", "organization", "base_url", "timeout", "max_retries"]:
+        if k in params:
+            client_params[k] = params.pop(k)
+    return client_params, params
+
+
+def batch_request(model: Model, prompts: dict[str, list], conf: Config):
+    client_params, completion_params = split_params(model.openai_params)
+    client = openai.OpenAI(**client_params)
+    docsd = DocumentDir(conf.data_dir)
+    pbar = tqdm(total=conf.sample * len(prompts), desc=f"model {model.name}")
+    for pname, messages in prompts.items():
+        msg_list = []
+        for _ in range(conf.sample):
+            content = ""
+            while not content:
+                chat_completion = client.chat.completions.create(
+                    messages=messages,
+                    **completion_params,
+                )
+                content = chat_completion.choices[0].message.content
+            msg_list.append([*messages, {"role": "assistant", "content": content}])
+            pbar.update(1)
+        docsd.dump(msg_list, pname, model.name)
+    pbar.close()
+
+
+if __name__ == "__main__":
+    argp = argparse.ArgumentParser(
+        description="Gather sample responses from model endpoints"
+    )
+    argp.add_argument("config", type=str)
+    args = argp.parse_args()
+
+    conf = load_config(args.config)
+    prompts = {p.name: parse_chat(p.chat) for p in conf.prompt}
+    for model in conf.model:
+        batch_request(model, prompts, conf)
diff --git a/lone_arena/__init__.py b/lone_arena/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lone_arena/chatcup.py b/lone_arena/chatcup.py
new file mode 100644
index 0000000..19ba9f7
--- /dev/null
+++ b/lone_arena/chatcup.py
@@ -0,0 +1,87 @@
+from .tournament import single_elimination, pair_matches, run_tournament, Player, Podium
+
+from attrs import define
+import pandas as pd
+
+from typing import Callable, Iterable, Protocol, TYPE_CHECKING
+
+
+class Cup(Protocol):
+    def nmatch(self, nprompt: int | None = None) -> int:
+        ...
+
+    def run(
+        self, compete: Callable[[Player, Player], tuple[Player, Player]]
+    ) -> Iterable[Podium]:
+        ...
+
+    def tabulate_result(
+        self, podiums: list[Podium], score_weights: list[float]
+    ) -> pd.DataFrame:
+        ...
+
+
+@define
+class Top3_1v1:
+    prompt_names: list[str]
+    model_names: list[str]
+    nplayer: int
+    top3_scores: tuple[float, float, float]
+
+    def nmatch(self, nprompt: int | None = None) -> int:
+        nprompt = nprompt or len(self.prompt_names)
+        return nprompt * (self.nplayer * 2 + 3)
+
+    def run(
+        self, compete: Callable[[Player, Player], tuple[Player, Player]]
+    ) -> Iterable[Podium]:
+        assert len(self.model_names) == 2, "expect 2 models"
+        for pname in self.prompt_names:
+            ta = single_elimination(
+                [(pname, self.model_names[0], i) for i in range(self.nplayer)]
+            )
+            tb = single_elimination(
+                [(pname, self.model_names[1], i) for i in range(self.nplayer)]
+            )
+            run_tournament(ta, tb, compete=compete)
+
+            top3 = list(zip(ta.podium.players, tb.podium.players))
+            tp = pair_matches(top3)
+            run_tournament(tp, compete=compete)
+
+            yield tp.podium
+
+    def tabulate_result(
+        self, podiums: list[Podium], score_weights: list[float]
+    ) -> pd.DataFrame:
+        ptags: list[list] = [p.players for p in podiums]
+        tb = []
+        scores = [0.0] * len(self.model_names)
+        total_scores = [0.0] * len(self.model_names)
+        for ptag, weight in zip(ptags, score_weights):
+            for i in range(len(self.model_names)):
+                scores[i] = (
+                    self.top3_scores[0] * (ptag[0][1] == self.model_names[i])
+                    + self.top3_scores[1] * (ptag[1][1] == self.model_names[i])
+                    + self.top3_scores[2] * (ptag[2][1] == self.model_names[i])
+                )
+                total_scores[i] += scores[i] * weight
+            tb.append(
+                {
+                    "Prompt": ptag[0][0],
+                    **{m: round(s, 1) for m, s in zip(self.model_names, scores)},
+                    "weight": f"{weight:.1f}×",
+                }
+            )
+        tb.append(
+            {
+                "Prompt": "TOTAL",
+                **{m: round(s, 1) for m, s in zip(self.model_names, total_scores)},
+                "weight": "",
+            }
+        )
+        return pd.DataFrame(tb)
+
+
+if TYPE_CHECKING:
+    _: type[Cup] = Top3_1v1
diff --git a/lone_arena/config.py b/lone_arena/config.py
new file mode 100644
index 0000000..05c4006
--- /dev/null
+++ b/lone_arena/config.py
@@ -0,0 +1,52 @@
+from attrs import define, Factory
+import cattrs
+
+import tomllib
+from pathlib import Path
+from math import log2
+from typing import Any
+
+cattrs.register_structure_hook(Path, lambda d, t: t(d))
+
+
+@define
+class Model:
+    name: str
+    # params for openai.OpenAI and openai.OpenAI.chat.completion.create
+    openai_params: dict[str, Any] = Factory(dict)
+
+    @classmethod
+    def from_dict(cls, d):
+        return cls(
+            name=d.pop("name"),
+            openai_params=d,
+        )
+
+
+cattrs.register_structure_hook(Model, lambda d, t: t.from_dict(d))
+
+
+@define
+class Prompt:
+    name: str
+    chat: str
+    score_weight: float = 1.0
+
+
+@define
+class Config:
+    data_dir: Path = Path("./data")
+    sample: int = 8
+    top3_scores: tuple[float, float, float] = (4.8, 3.2, 2.0)
+    model: list[Model] = Factory(list)
+    prompt: list[Prompt] = Factory(list)
+
+    def __attrs_post_init__(self):
+        assert log2(self.sample).is_integer(), "config: sample must be power of 2"
+        assert len(self.prompt) > 0, "config: expect at least 1 prompt"
+
+
+def load_config(fname: str) -> Config:
+    with open(fname, "rb") as f:
+        d = tomllib.load(f)
+    return cattrs.structure(d, Config)
diff --git a/lone_arena/files.py b/lone_arena/files.py
new file mode 100644
index 0000000..6f60961
--- /dev/null
+++ b/lone_arena/files.py
@@ -0,0 +1,62 @@
+from .tournament import Podium, Player
+
+from attrs import define
+
+from pathlib import Path
+from functools import cached_property
+import json
+
+type Messages = list[dict]
+type Documents = dict[Player, Messages]
+
+
+@define(slots=False)
+class DocumentDir:
+    data_dir: Path
+
+    @cached_property
+    def doc_dir(self) -> Path:
+        d = self.data_dir / "response"
+        d.mkdir(exist_ok=True, parents=True)
+        return d
+
+    def load(self, prompt_names: list[str], model_names: list[str]) -> Documents:
+        docs = {}
+        for pname in prompt_names:
+            for mname in model_names:
+                with (self.doc_dir / pname / f"{mname}.jsonl").open("r") as fi:
+                    for i, line in enumerate(fi):
+                        docs[(pname, mname, i)] = json.loads(line)
+        return docs
+
+    def dump(self, msg_list: list[Messages], prompt_name: str, model_name: str):
+        pdir = self.doc_dir / prompt_name
+        pdir.mkdir(exist_ok=True)
+        with (pdir / f"{model_name}.jsonl").open("w") as fo:
+            for msg in msg_list:
+                json.dump(msg, fo, ensure_ascii=False)
+                print(file=fo)
+
+
+@define(slots=False)
+class ResultDir:
+    data_dir: Path
+
+    @cached_property
+    def result_dir(self) -> Path:
+        d = self.data_dir / "result"
+        d.mkdir(exist_ok=True, parents=True)
+        return d
+
+    def load(self, prompt_names: list[str]) -> tuple[list[Podium], list[str]]:
+        podiums = []
+        pnames_todo = []
+        for pname in prompt_names:
+            if (self.result_dir / f"{pname}.json").exists():
+                podiums.append(Podium.load_from(self.result_dir / f"{pname}.json"))
+            else:
+                pnames_todo.append(pname)
+        return podiums, pnames_todo
+
+    def dump(self, podium: Podium, docs: Documents, prompt_name: str):
+        podium.dump(self.result_dir / f"{prompt_name}.json", docs)
diff --git a/lone_arena/format.py b/lone_arena/format.py
new file mode 100644
index 0000000..124d4b7
--- /dev/null
+++ b/lone_arena/format.py
@@ -0,0 +1,20 @@
+def format_conversation(conv):
+    if not conv:
+        return ""
+    conv_f = []
+    for e in conv[:-1]:
+        conv_f.append(
+            f"**<code>{'&nbsp;'*(9-len(e['role']))}{e['role']}</code>** <span style='color: #aaaaaa'>{e['content']}</span>"
+        )
+    conv_f.append(
+        f"**<code>{'&nbsp;'*(9-len(conv[-1]['role']))}{conv[-1]['role']}</code>** {conv[-1]['content']}"
+    )
+    s = "\n\n".join(conv_f)
+    return min_lines(s, 6)
+
+
+def min_lines(s, n):
+    ln = len(s.splitlines())
+    if ln < n:
+        s += "<br/>" * (n - ln)
+    return s
diff --git a/lone_arena/test_tournament.py b/lone_arena/test_tournament.py
new file mode 100644
index 0000000..64c51b0
--- /dev/null
+++ b/lone_arena/test_tournament.py
@@ -0,0 +1,38 @@
+import pytest
+from .tournament import *
+
+
+@pytest.mark.parametrize("case", ["4", "16"])
+def test_single_elimination(case):
+    players, n_init, n_to_top = {
+        "4": (list(range(4)), 2, 1),
+        "16": (list(range(16)), 8, 3),
+    }[case]
+
+    t = single_elimination(players)
+    assert len(t.init_matches) == n_init
+    m = t.init_matches[0]
+    for _ in range(n_to_top):
+        wt = m.winner_to
+        assert wt is not None and isinstance(wt[0], Match)
+        m = wt[0]
+    assert m.winner_to is not None and m.winner_to[0] is t.podium
+    assert len(t.podium.players) == 3
+
+
+def test_pair_matches():
+    players = [(0, 1), (2, 3), (4, 5), (6, 7)]
+    t = pair_matches(players)
+    assert len(t.init_matches) == 4
+    assert len(t.podium.players) == 4
+
+
+def test_run_tournament():
+    def compete(p1, p2):
+        if p1 < p2:
+            return p2, p1
+        return p1, p2
+
+    t = single_elimination([0, 7, 3, 4, 1, 6, 2, 5])
+    run_tournament(t, compete=compete)
+    assert t.podium.players == [7, 6, 5]
diff --git a/lone_arena/tournament.py b/lone_arena/tournament.py
new file mode 100644
index 0000000..a9e1e26
--- /dev/null
+++ b/lone_arena/tournament.py
@@ -0,0 +1,109 @@
+from attrs import define, Factory, NOTHING as TBD
+
+from math import log2
+from itertools import chain, batched
+import random
+from pathlib import Path
+import json
+from typing import Self, Callable, Protocol
+from collections.abc import Hashable
+
+type Player = Hashable
+
+
+class PlayerBox(Protocol):
+    players: list[Player]
+
+
+@define
+class Match:
+    winner_to: tuple[PlayerBox, int] | None = None
+    loser_to: tuple[PlayerBox, int] | None = None
+    players: list[Player] = Factory(lambda: [TBD, TBD])
+
+    def __repr__(self) -> str:
+        return f"Match({self.players})"
+
+    def moveon(self, winner: Player, loser: Player) -> list["Match"]:
+        next_matches = [self._fill(winner, True), self._fill(loser, False)]
+        return [m for m in next_matches if isinstance(m, Match)]
+
+    def _fill(self, v: Player, is_winning: bool) -> PlayerBox | None:
+        next_pos = self.winner_to if is_winning else self.loser_to
+        if next_pos is None:
+            return None
+        m, i = next_pos
+        m.players[i] = v
+        if any((x is TBD for x in m.players)):
+            return None
+        return m
+
+
+@define
+class Podium:
+    players: list[Player]
+
+    @classmethod
+    def for_(cls, n: int) -> Self:
+        return cls(players=[TBD] * n)
+
+    def dump(self, path: Path, docs: dict[Player, list] | None = None):
+        if docs is None:
+            d = [{"id": p} for p in self.players]
+        else:
+            d = [{"id": p, "chat": docs[p]} for p in self.players]
+        json.dump(d, path.open("w"), ensure_ascii=False, indent=2)
+
+    @classmethod
+    def load_from(cls, path: Path) -> Self:
+        d = json.load(path.open("r"))
+        return cls(players=[tuple(x["id"]) for x in d])
+
+
+@define
+class Tournament:
+    init_matches: list[Match]
+    podium: Podium
+
+
+def run_tournament(
+    *tournaments: Tournament,
+    compete: Callable[[Player, Player], tuple[Player, Player]],
+):
+    pool = list(chain.from_iterable(t.init_matches for t in tournaments))
+    while pool:
+        m = pool.pop(random.randrange(len(pool)))
+        random.shuffle(m.players)
+        winner, loser = compete(*m.players)
+        pool.extend(m.moveon(winner, loser))
+
+
+def single_elimination(players: list[Player]) -> Tournament:
+    nplayer = len(players)
+    assert log2(nplayer).is_integer(), "expect 2^n players"
+
+    top3 = Podium.for_(3)
+    final = Match((top3, 0), (top3, 1))
+    loser_final = Match((top3, 2), None)
+    semifinal0 = Match((final, 0), (loser_final, 0))
+    semifinal1 = Match((final, 1), (loser_final, 1))
+    leaves = [semifinal0, semifinal1]
+    while len(leaves) * 2 < nplayer:
+        leaves = list(
+            chain.from_iterable(
+                [Match((m, 0), None), Match((m, 1), None)] for m in leaves
+            )
+        )
+
+    for i, p in enumerate(batched(players, 2)):
+        leaves[i].players = list(p)
+
+    return Tournament(leaves, top3)
+
+
+def pair_matches(players: list[tuple[Player, Player]]) -> Tournament:
+    assert all(len(p) == 2 for p in players), "expect n pairs"
+    n = len(players)
+    winners = Podium.for_(n)
+    matches = [Match((winners, i), None, list(p)) for i, p in enumerate(players)]
+    return Tournament(matches, winners)
diff --git a/media/lone_arena-sketch-small.png b/media/lone_arena-sketch-small.png
new file mode 100644
index 0000000..7c3b7b6
Binary files /dev/null and b/media/lone_arena-sketch-small.png differ
diff --git a/media/lone_arena-ui-en.png b/media/lone_arena-ui-en.png
new file mode 100644
index 0000000..381907e
Binary files /dev/null and b/media/lone_arena-ui-en.png differ
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..c8a1f70
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,3 @@
+pytest
+ruff
+pre-commit
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4bb1856
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+gradio
+openai
+attrs
+cattrs
+pandas
+tqdm