Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add bloom filter related proto fields #710

Merged
merged 2 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions google/cloud/firestore_v1/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
from .aggregation_result import (
AggregationResult,
)
from .bloom_filter import (
BitSequence,
BloomFilter,
)
from .common import (
DocumentMask,
Precondition,
Expand Down Expand Up @@ -75,6 +79,8 @@

__all__ = (
"AggregationResult",
"BitSequence",
"BloomFilter",
"DocumentMask",
"Precondition",
"TransactionOptions",
Expand Down
110 changes: 110 additions & 0 deletions google/cloud/firestore_v1/types/bloom_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

from typing import MutableMapping, MutableSequence

import proto # type: ignore


__protobuf__ = proto.module(
package="google.firestore.v1",
manifest={
"BitSequence",
"BloomFilter",
},
)


class BitSequence(proto.Message):
r"""A sequence of bits, encoded in a byte array.

Each byte in the ``bitmap`` byte array stores 8 bits of the
sequence. The only exception is the last byte, which may store 8 *or
fewer* bits. The ``padding`` defines the number of bits of the last
byte to be ignored as "padding". The values of these "padding" bits
are unspecified and must be ignored.

To retrieve the first bit, bit 0, calculate:
``(bitmap[0] & 0x01) != 0``. To retrieve the second bit, bit 1,
calculate: ``(bitmap[0] & 0x02) != 0``. To retrieve the third bit,
bit 2, calculate: ``(bitmap[0] & 0x04) != 0``. To retrieve the
fourth bit, bit 3, calculate: ``(bitmap[0] & 0x08) != 0``. To
retrieve bit n, calculate:
``(bitmap[n / 8] & (0x01 << (n % 8))) != 0``.

The "size" of a ``BitSequence`` (the number of bits it contains) is
calculated by this formula: ``(bitmap.length * 8) - padding``.

Attributes:
bitmap (bytes):
The bytes that encode the bit sequence.
May have a length of zero.
padding (int):
The number of bits of the last byte in ``bitmap`` to ignore
as "padding". If the length of ``bitmap`` is zero, then this
value must be ``0``. Otherwise, this value must be between 0
and 7, inclusive.
"""

bitmap: bytes = proto.Field(
proto.BYTES,
number=1,
)
padding: int = proto.Field(
proto.INT32,
number=2,
)


class BloomFilter(proto.Message):
r"""A bloom filter (https://en.wikipedia.org/wiki/Bloom_filter).

The bloom filter hashes the entries with MD5 and treats the
resulting 128-bit hash as 2 distinct 64-bit hash values, interpreted
as unsigned integers using 2's complement encoding.

These two hash values, named ``h1`` and ``h2``, are then used to
compute the ``hash_count`` hash values using the formula, starting
at ``i=0``:

::

h(i) = h1 + (i * h2)

These resulting values are then taken modulo the number of bits in
the bloom filter to get the bits of the bloom filter to test for the
given entry.

Attributes:
bits (google.cloud.firestore_v1.types.BitSequence):
The bloom filter data.
hash_count (int):
The number of hashes used by the algorithm.
"""

bits: "BitSequence" = proto.Field(
proto.MESSAGE,
number=1,
message="BitSequence",
)
hash_count: int = proto.Field(
proto.INT32,
number=2,
)


__all__ = tuple(sorted(__protobuf__.manifest))
15 changes: 15 additions & 0 deletions google/cloud/firestore_v1/types/firestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from google.cloud.firestore_v1.types import query as gf_query
from google.cloud.firestore_v1.types import write
from google.protobuf import timestamp_pb2 # type: ignore
from google.protobuf import wrappers_pb2 # type: ignore
from google.rpc import status_pb2 # type: ignore


Expand Down Expand Up @@ -1338,6 +1339,15 @@ class Target(proto.Message):
once (bool):
If the target should be removed once it is
current and consistent.
expected_count (google.protobuf.wrappers_pb2.Int32Value):
The number of documents that last matched the query at the
resume token or read time.

This value is only relevant when a ``resume_type`` is
provided. This value being present and greater than zero
signals that the client wants
``ExistenceFilter.unchanged_names`` to be included in the
response.
"""

class DocumentsTarget(proto.Message):
Expand Down Expand Up @@ -1419,6 +1429,11 @@ class QueryTarget(proto.Message):
proto.BOOL,
number=6,
)
expected_count: wrappers_pb2.Int32Value = proto.Field(
proto.MESSAGE,
number=12,
message=wrappers_pb2.Int32Value,
)


class TargetChange(proto.Message):
Expand Down
24 changes: 24 additions & 0 deletions google/cloud/firestore_v1/types/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import proto # type: ignore

from google.cloud.firestore_v1.types import bloom_filter
from google.cloud.firestore_v1.types import common
from google.cloud.firestore_v1.types import document as gf_document
from google.protobuf import timestamp_pb2 # type: ignore
Expand Down Expand Up @@ -471,6 +472,24 @@ class ExistenceFilter(proto.Message):
If different from the count of documents in the client that
match, the client must manually determine which documents no
longer match the target.
unchanged_names (google.cloud.firestore_v1.types.BloomFilter):
A bloom filter that contains the UTF-8 byte encodings of the
resource names of the documents that match
[target_id][google.firestore.v1.ExistenceFilter.target_id],
in the form
``projects/{project_id}/databases/{database_id}/documents/{document_path}``
that have NOT changed since the query results indicated by
the resume token or timestamp given in
``Target.resume_type``.

This bloom filter may be omitted at the server's discretion,
such as if it is deemed that the client will not make use of
it or if it is too computationally expensive to calculate or
transmit. Clients must gracefully handle this field being
absent by falling back to the logic used before this field
existed; that is, re-add the target without a resume token
to figure out which documents in the client's cache are out
of sync.
"""

target_id: int = proto.Field(
Expand All @@ -481,6 +500,11 @@ class ExistenceFilter(proto.Message):
proto.INT32,
number=2,
)
unchanged_names: bloom_filter.BloomFilter = proto.Field(
proto.MESSAGE,
number=3,
message=bloom_filter.BloomFilter,
)


__all__ = tuple(sorted(__protobuf__.manifest))