Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields):
"""
partial = {}
for filter_field in filter_fields:
api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field)
api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field)
if api_field is None and filter_field not in obj._properties:
raise ValueError("No property %s" % filter_field)
elif api_field is not None:
partial[api_field] = obj._properties.get(api_field)
_set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field))
else:
# allows properties that are not defined in the library
# and properties that have the same name as API resource key
Expand Down
57 changes: 32 additions & 25 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
"""Schemas for BigQuery tables / queries."""

from __future__ import annotations
import collections
import enum
import typing
from typing import Any, cast, Dict, Iterable, Optional, Union
from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import standard_sql
Expand Down Expand Up @@ -489,6 +488,8 @@ def _parse_schema_resource(info):
Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
A list of parsed fields, or ``None`` if no "fields" key found.
"""
if isinstance(info, list):
return [SchemaField.from_api_repr(f) for f in info]
return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]


Expand All @@ -501,40 +502,46 @@ def _build_schema_resource(fields):
Returns:
Sequence[Dict]: Mappings describing the schema of the supplied fields.
"""
return [field.to_api_repr() for field in fields]
if isinstance(fields, Sequence):
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
return [field.to_api_repr() for field in fields]

else:
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")


def _to_schema_fields(schema):
"""Coerce `schema` to a list of schema field instances.
"""Coerces schema to a list of SchemaField instances while
preserving the original structure as much as possible.

Args:
schema(Sequence[Union[ \
:class:`~google.cloud.bigquery.schema.SchemaField`, \
Mapping[str, Any] \
]]):
Table schema to convert. If some items are passed as mappings,
their content must be compatible with
:meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
schema (Sequence[Union[ \
:class:`~google.cloud.bigquery.schema.SchemaField`, \
Mapping[str, Any] \
]
]
)::
Table schema to convert. Can be a list of SchemaField
objects or mappings.

Returns:
Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
A list of SchemaField objects.

Raises:
Exception: If ``schema`` is not a sequence, or if any item in the
sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
instance or a compatible mapping representation of the field.
TypeError: If schema is not a Sequence.
"""
for field in schema:
if not isinstance(field, (SchemaField, collections.abc.Mapping)):
raise ValueError(
"Schema items must either be fields or compatible "
"mapping representations."
)

return [
field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
for field in schema
]
if isinstance(schema, Sequence):
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
return [
field
if isinstance(field, SchemaField)
else SchemaField.from_api_repr(field)
for field in schema
]

else:
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")


class PolicyTagList(object):
Expand Down
75 changes: 69 additions & 6 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
import functools
import operator
import typing
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence

import warnings

try:
Expand Down Expand Up @@ -66,6 +67,7 @@
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.enums import DefaultPandasDTypes
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery import schema as _schema
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
Expand Down Expand Up @@ -398,7 +400,7 @@ class Table(_TableBase):
"partitioning_type": "timePartitioning",
"range_partitioning": "rangePartitioning",
"time_partitioning": "timePartitioning",
"schema": "schema",
"schema": ["schema", "fields"],
"snapshot_definition": "snapshotDefinition",
"clone_definition": "cloneDefinition",
"streaming_buffer": "streamingBuffer",
Expand All @@ -411,6 +413,7 @@ class Table(_TableBase):
"max_staleness": "maxStaleness",
"resource_tags": "resourceTags",
"external_catalog_table_options": "externalCatalogTableOptions",
"foreign_type_info": ["schema", "foreignTypeInfo"],
}

def __init__(self, table_ref, schema=None) -> None:
Expand Down Expand Up @@ -451,8 +454,20 @@ def schema(self):
If ``schema`` is not a sequence, or if any item in the sequence
is not a :class:`~google.cloud.bigquery.schema.SchemaField`
instance or a compatible mapping representation of the field.

.. Note::
If you are referencing a schema for an external catalog table such
as a Hive table, it will also be necessary to populate the foreign_type_info
attribute. This is not necessary if defining the schema for a BigQuery table.

For details, see:
https://cloud.google.com/bigquery/docs/external-tables
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'll need some logic in the setter for schema to avoid overwriting the schema property entirely. Instead, it'll need to be responsible for just schema.fields.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I'm not sure if this format will render well in the docs. We might just move all the contents under NOTE: to after Table's schema.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not yet addressed Tim's comment here.
I spoke with Linchin about the note and with the revision I added, Sphinx should be able to handle the note with no problem.

Copy link
Collaborator Author

@chalmerlowe chalmerlowe Feb 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tswast

fields and schema are two separate items:

  • two different attributes (with setters and getters)
  • two separate nodes on the .properties dict

It is possible for a user to supply an api_resource (a dict) that will overwrite both at the same time, such as: ._properties["schema"] = {fields: [], foreign_type_info: {type_system: "hello world"}. At that point, the end user should expect both to be overwritten.

Due to the nested separation of fields and schema in the ._properties dict and how we write content to it (either with setters OR directly into ._properties), I am not aware of any means where by setting one or the other will cause it's opposite to be accidentally overwritten.

I don't think any additional checks are required.

Also, in test_table.py::test_to_api_repr_w_schema_and_foreign_type_info test, it is broken into several steps. Two of those steps are specifically focused on ensuring that if either item is set the other does not change.

"""
prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
prop = _helpers._get_sub_prop(
self._properties, self._PROPERTY_TO_API_FIELD["schema"]
)
if not prop:
return []
else:
Expand All @@ -463,10 +478,21 @@ def schema(self, value):
api_field = self._PROPERTY_TO_API_FIELD["schema"]

if value is None:
self._properties[api_field] = None
else:
_helpers._set_sub_prop(
self._properties,
api_field,
None,
)
elif isinstance(value, Sequence):
value = _to_schema_fields(value)
self._properties[api_field] = {"fields": _build_schema_resource(value)}
value = _build_schema_resource(value)
_helpers._set_sub_prop(
self._properties,
api_field,
value,
)
else:
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")

@property
def labels(self):
Expand Down Expand Up @@ -1075,6 +1101,43 @@ def external_catalog_table_options(
self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
] = value

@property
def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]:
"""Optional. Specifies metadata of the foreign data type definition in
field schema (TableFieldSchema.foreign_type_definition).

Returns:
Optional[schema.ForeignTypeInfo]:
Foreign type information, or :data:`None` if not set.

.. Note::
foreign_type_info is only required if you are referencing an
external catalog such as a Hive table.
For details, see:
https://cloud.google.com/bigquery/docs/external-tables
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
"""

prop = _helpers._get_sub_prop(
self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"]
)
if prop is not None:
return _schema.ForeignTypeInfo.from_api_repr(prop)
return None

@foreign_type_info.setter
def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]):
value = _helpers._isinstance_or_raise(
value,
(_schema.ForeignTypeInfo, dict),
none_allowed=True,
)
if isinstance(value, _schema.ForeignTypeInfo):
value = value.to_api_repr()
_helpers._set_sub_prop(
self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value
)

@classmethod
def from_string(cls, full_table_id: str) -> "Table":
"""Construct a table from fully-qualified table ID.
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/job/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self):

config = LoadJobConfig()
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
with self.assertRaises(ValueError):
with self.assertRaises(TypeError):
config.schema = [full_name, object()]

def test_schema_setter(self):
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2051,7 +2051,7 @@ def test_update_dataset(self):
ds.labels = LABELS
ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")]
ds.resource_tags = RESOURCE_TAGS
fields = [
filter_fields = [
"description",
"friendly_name",
"location",
Expand All @@ -2065,12 +2065,12 @@ def test_update_dataset(self):
) as final_attributes:
ds2 = client.update_dataset(
ds,
fields=fields,
fields=filter_fields,
timeout=7.5,
)

final_attributes.assert_called_once_with(
{"path": "/%s" % PATH, "fields": fields}, client, None
{"path": "/%s" % PATH, "fields": filter_fields}, client, None
)

conn.api_request.assert_called_once_with(
Expand Down Expand Up @@ -2615,7 +2615,7 @@ def test_update_table_w_schema_None(self):
self.assertEqual(len(conn.api_request.call_args_list), 2)
req = conn.api_request.call_args_list[1]
self.assertEqual(req[1]["method"], "PATCH")
sent = {"schema": None}
sent = {"schema": {"fields": None}}
self.assertEqual(req[1]["data"], sent)
self.assertEqual(req[1]["path"], "/%s" % path)
self.assertEqual(len(updated_table.schema), 0)
Expand Down
Loading