Skip to content

Commit

Permalink
Merge commit '61ba3530794fbe2b5739a04c26dc35e704ce69c9' into fix/SIN-…
Browse files Browse the repository at this point in the history
…355-query

* commit '61ba3530794fbe2b5739a04c26dc35e704ce69c9':
  fix(dataset): slug format validation on load (#1609)
  fix(views): transformation using raw sql (#1608)
  Release v3.0.0-beta.10
  • Loading branch information
scaliseraoul committed Feb 13, 2025
2 parents 911cf35 + 61ba353 commit 72b0584
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 13 deletions.
7 changes: 4 additions & 3 deletions pandasai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,9 @@ def load(dataset_path: str) -> DataFrame:
Returns:
DataFrame: A new PandaAI DataFrame instance with loaded data.
"""
path_parts = dataset_path.split("/")
if len(path_parts) != 2:
raise ValueError("The path must be in the format 'organization/dataset'.")

# Validate the dataset path
get_validated_dataset_path(dataset_path)

dataset_full_path = os.path.join(find_project_root(), "datasets", dataset_path)

Expand Down Expand Up @@ -282,6 +282,7 @@ def load(dataset_path: str) -> DataFrame:
if local_dataset_exists
else "Dataset fetched successfully from the remote server."
)
# Printed to display info to the user
print(message)

return df
Expand Down
11 changes: 9 additions & 2 deletions pandasai/helpers/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ def find_closest(filename):
return os.path.join(find_project_root(filename), filename)


def validate_name_format(value):
"""
Validate name format to be 'my-org'
"""
return bool(re.match(r"^[a-z0-9]+(?:-[a-z0-9]+)*$", value))


def get_validated_dataset_path(path: str):
# Validate path format
path_parts = path.split("/")
Expand All @@ -54,12 +61,12 @@ def get_validated_dataset_path(path: str):
raise ValueError("Both organization and dataset names are required")

# Validate organization and dataset name format
if not bool(re.match(r"^[a-z0-9\-]+$", org_name)):
if not validate_name_format(org_name):
raise ValueError(
"Organization name must be lowercase and use hyphens instead of spaces (e.g. 'my-org')"
)

if not bool(re.match(r"^[a-z0-9\-]+$", dataset_name)):
if not validate_name_format(dataset_name):
raise ValueError(
"Dataset name must be lowercase and use hyphens instead of spaces (e.g. 'my-dataset')"
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandasai"
version = "3.0.0-beta.9"
version = "3.0.0-beta.10"
description = "Chat with your database (SQL, CSV, pandas, mongodb, noSQL, etc). PandaAI makes data analysis conversational using LLMs (GPT 3.5 / 4, Anthropic, VertexAI) and RAG."
authors = ["Gabriele Venturi"]
license = "MIT"
Expand Down
18 changes: 18 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,24 @@ def test_get_validated_dataset_path_invalid_dataset():
get_validated_dataset_path("my-org/INVALID_DATASET")


def test_get_validated_dataset_path_start_with_hyphen():
"""Test get_validated_dataset_path with invalid dataset name"""
with pytest.raises(
ValueError,
match="Dataset name must be lowercase and use hyphens instead of spaces",
):
get_validated_dataset_path("my-org/-INVALID-DATASET")


def test_get_validated_dataset_path_end_with_hyphen():
"""Test get_validated_dataset_path with invalid dataset name"""
with pytest.raises(
ValueError,
match="Dataset name must be lowercase and use hyphens instead of spaces",
):
get_validated_dataset_path("my-org/-INVALID-DATASET")


@pytest.fixture
def mock_dataset_loader():
with patch("pandasai.cli.main.DatasetLoader") as mock:
Expand Down
14 changes: 7 additions & 7 deletions tests/unit_tests/test_pandasai_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_load_valid_dataset(
mock_find_project_root.return_value = os.path.join("mock", "root")
mock_exists.return_value = True

dataset_path = "org/dataset_name"
dataset_path = "org/dataset-name"
result = pandasai.load(dataset_path)

# Verify the class method was called
Expand All @@ -138,7 +138,7 @@ def test_load_dataset_not_found(self, mockenviron, mock_bytes_io, mock_zip_file)
pandasai.get_pandaai_session.return_value = MagicMock()
mock_request_session.get.return_value.status_code = 404

dataset_path = "org/dataset_name"
dataset_path = "org/dataset-name"

with pytest.raises(DatasetNotFound):
pandasai.load(dataset_path)
Expand All @@ -154,11 +154,11 @@ def test_load_missing_not_found_locally_and_no_remote_key(
mock_response = MagicMock()
mock_response.status_code = 404
mock_session.return_value.get.return_value = mock_response
dataset_path = "org/dataset_name"
dataset_path = "org/dataset-name"

with pytest.raises(
PandaAIApiKeyError,
match='The dataset "org/dataset_name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.',
match='The dataset "org/dataset-name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.',
):
pandasai.load(dataset_path)

Expand All @@ -167,7 +167,7 @@ def test_load_missing_not_found_locally_and_no_remote_key(
def test_load_missing_api_url(self, mock_exists):
"""Test loading when API URL is missing."""
mock_exists.return_value = False
dataset_path = "org/dataset_name"
dataset_path = "org/dataset-name"

with pytest.raises(DatasetNotFound):
pandasai.load(dataset_path)
Expand All @@ -181,7 +181,7 @@ def test_load_missing_not_found(self, mock_session, mock_exists):
mock_response = MagicMock()
mock_response.status_code = 404
mock_session.return_value.get.return_value = mock_response
dataset_path = "org/dataset_name"
dataset_path = "org/dataset-name"

with pytest.raises(DatasetNotFound):
pandasai.load(dataset_path)
Expand All @@ -208,7 +208,7 @@ def test_load_successful_zip_extraction(
mock_request_session.get.return_value.status_code = 200
mock_request_session.get.return_value.content = b"mock zip content"

dataset_path = "org/dataset_name"
dataset_path = "org/dataset-name"

# Mock the zip file extraction
mock_zip_file.return_value.__enter__.return_value.extractall = MagicMock()
Expand Down

0 comments on commit 72b0584

Please sign in to comment.