Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Deprecate use of FixedAlphanum #254

Merged
merged 1 commit into from
Oct 14, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions data_diff/databases/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
Native_UUID,
String_UUID,
String_Alphanum,
String_FixedAlphanum,
String_VaryingAlphanum,
TemporalType,
UnknownColType,
Expand Down Expand Up @@ -133,7 +132,7 @@ def query(self, sql_ast: Union[Expr, Generator], res_type: type = list):
for row in explain:
# Most returned a 1-tuple. Presto returns a string
if isinstance(row, tuple):
row ,= row
(row,) = row
logger.debug("EXPLAIN: %s", row)
answer = input("Continue? [y/n] ")
if not answer.lower() in ["y", "yes"]:
Expand Down Expand Up @@ -240,7 +239,7 @@ def _process_table_schema(
# Return a dict of form {name: type} after normalization
return col_dict

def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], where: str = None):
def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], where: str = None, sample_size=32):
"""Refine the types in the column dict, by querying the database for a sample of their values

'where' restricts the rows to be sampled.
Expand All @@ -251,7 +250,7 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe
return

fields = [self.normalize_uuid(c, String_UUID()) for c in text_columns]
samples_by_row = self.query(table(*table_path).select(*fields).where(where or SKIP).limit(16), list)
samples_by_row = self.query(table(*table_path).select(*fields).where(where or SKIP).limit(sample_size), list)
if not samples_by_row:
raise ValueError(f"Table {table_path} is empty.")

Expand Down Expand Up @@ -279,13 +278,7 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe
)
else:
assert col_name in col_dict
lens = set(map(len, alphanum_samples))
if len(lens) > 1:
col_dict[col_name] = String_VaryingAlphanum()
else:
(length,) = lens
col_dict[col_name] = String_FixedAlphanum(length=length)
continue
col_dict[col_name] = String_VaryingAlphanum()

# @lru_cache()
# def get_table_schema(self, path: DbPath) -> Dict[str, ColType]:
Expand Down