Skip to content

Commit 5c43efb

Browse files
authored
feat: Add bigframes.bigquery.rand() function (#2501)
This change adds `bigframes.bigquery.rand()` which wraps the BigQuery `RAND()` function. It includes a warning about non-determinism in the docstring. Additionally, `SqlScalarOp` has been updated to accept an `is_deterministic` argument, which is set to `False` for `rand()`. Fixes customer request for this function. 🦕 Co-authored-by: tswast <247555+tswast@users.noreply.github.com>
1 parent ce686c1 commit 5c43efb

File tree

5 files changed

+133
-0
lines changed

5 files changed

+133
-0
lines changed

bigframes/bigquery/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
to_json,
5959
to_json_string,
6060
)
61+
from bigframes.bigquery._operations.mathematical import rand
6162
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6263
from bigframes.bigquery._operations.sql import sql_scalar
6364
from bigframes.bigquery._operations.struct import struct
@@ -99,6 +100,8 @@
99100
parse_json,
100101
to_json,
101102
to_json_string,
103+
# mathematical ops
104+
rand,
102105
# search ops
103106
create_vector_index,
104107
vector_search,
@@ -154,6 +157,8 @@
154157
"parse_json",
155158
"to_json",
156159
"to_json_string",
160+
# mathematical ops
161+
"rand",
157162
# search ops
158163
"create_vector_index",
159164
"vector_search",
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from bigframes import dtypes
18+
from bigframes import operations as ops
19+
import bigframes.core.col
20+
import bigframes.core.expression
21+
22+
23+
def rand() -> bigframes.core.col.Expression:
24+
"""
25+
Generates a pseudo-random value of type FLOAT64 in the range of [0, 1),
26+
inclusive of 0 and exclusive of 1.
27+
28+
.. warning::
29+
This method introduces non-determinism to the expression. Reading the
30+
same column twice may result in different results. The value might
31+
change. Do not use this value or any value derived from it as a join
32+
key.
33+
34+
**Examples:**
35+
36+
>>> import bigframes.pandas as bpd
37+
>>> import bigframes.bigquery as bbq
38+
>>> df = bpd.DataFrame({"a": [1, 2, 3]})
39+
>>> df['random'] = bbq.rand()
40+
>>> # Resulting column 'random' will contain random floats between 0 and 1.
41+
42+
Returns:
43+
bigframes.pandas.api.typing.Expression:
44+
An expression that can be used in
45+
:func:`~bigframes.pandas.DataFrame.assign` and other methods. See
46+
:func:`bigframes.pandas.col`.
47+
"""
48+
op = ops.SqlScalarOp(
49+
_output_type=dtypes.FLOAT_DTYPE,
50+
sql_template="RAND()",
51+
is_deterministic=False,
52+
)
53+
return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ()))

bigframes/operations/generic_ops.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,15 @@ class SqlScalarOp(base_ops.NaryOp):
443443
name: typing.ClassVar[str] = "sql_scalar"
444444
_output_type: dtypes.ExpressionType
445445
sql_template: str
446+
is_deterministic: bool = True
446447

447448
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
448449
return self._output_type
449450

451+
@property
452+
def deterministic(self) -> bool:
453+
return self.is_deterministic
454+
450455

451456
@dataclasses.dataclass(frozen=True)
452457
class PyUdfOp(base_ops.NaryOp):
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.bigquery as bbq
16+
17+
18+
def test_rand(scalars_df_index):
19+
df = scalars_df_index
20+
21+
# Apply rand
22+
df = df.assign(random=bbq.rand())
23+
result = df["random"]
24+
25+
# Eagerly evaluate
26+
result_pd = result.to_pandas()
27+
28+
# Check length
29+
assert len(result_pd) == len(df)
30+
31+
# Check values in [0, 1)
32+
assert (result_pd >= 0).all()
33+
assert (result_pd < 1).all()
34+
35+
# Check not all values are equal (unlikely collision for random)
36+
if len(result_pd) > 1:
37+
assert result_pd.nunique() > 1
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.bigquery as bbq
16+
import bigframes.core.col as col
17+
import bigframes.core.expression as ex
18+
import bigframes.dtypes as dtypes
19+
import bigframes.operations as ops
20+
21+
22+
def test_rand_returns_expression():
23+
expr = bbq.rand()
24+
25+
assert isinstance(expr, col.Expression)
26+
node = expr._value
27+
assert isinstance(node, ex.OpExpression)
28+
op = node.op
29+
assert isinstance(op, ops.SqlScalarOp)
30+
assert op.sql_template == "RAND()"
31+
assert op._output_type == dtypes.FLOAT_DTYPE
32+
assert not op.is_deterministic
33+
assert len(node.inputs) == 0

0 commit comments

Comments
 (0)