S3: Add tests for PyArrow with native S3 filesystem (#7508)

* PyArrow native S3 filesystem

* add sse-s3 tests

* update

* minor

* ENABLE_SSE_S3

* Update test_pyarrow_native_s3.py

* clean up

* refactoring

* Update test_pyarrow_native_s3.py
This commit is contained in:
Chris Lu
2025-11-19 13:49:22 -08:00
committed by GitHub
parent ca84a8a713
commit 8be9e258fc
7 changed files with 1008 additions and 5 deletions

View File

@@ -0,0 +1,41 @@
"""
Shared utility functions for PyArrow Parquet tests.
This module provides common test utilities used across multiple test scripts
to avoid code duplication and ensure consistency.
"""
import pyarrow as pa
def create_sample_table(num_rows: int = 5) -> pa.Table:
"""Create a sample PyArrow table for testing.
Args:
num_rows: Number of rows to generate (default: 5)
Returns:
PyArrow Table with test data containing:
- id: int64 sequential IDs (0 to num_rows-1)
- name: string user names (user_0, user_1, ...)
- value: float64 values (id * 1.5)
- flag: bool alternating True/False based on even/odd id
Example:
>>> table = create_sample_table(3)
>>> print(table)
pyarrow.Table
id: int64
name: string
value: double
flag: bool
"""
return pa.table(
{
"id": pa.array(range(num_rows), type=pa.int64()),
"name": pa.array([f"user_{i}" for i in range(num_rows)], type=pa.string()),
"value": pa.array([float(i) * 1.5 for i in range(num_rows)], type=pa.float64()),
"flag": pa.array([i % 2 == 0 for i in range(num_rows)], type=pa.bool_()),
}
)