Merge commit '61ba3530794fbe2b5739a04c26dc35e704ce69c9' into fix/SIN-…

…355-query * commit '61ba3530794fbe2b5739a04c26dc35e704ce69c9': fix(dataset): slug format validation on load (#1609) fix(views): transformation using raw sql (#1608) Release v3.0.0-beta.10
sinaptik-ai · Feb 13, 2025 · 72b0584 · 72b0584
2 parents 911cf35 + 61ba353
commit 72b0584
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 13 deletions.
diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -244,9 +244,9 @@ def load(dataset_path: str) -> DataFrame:
     Returns:
         DataFrame: A new PandaAI DataFrame instance with loaded data.
     """
-    path_parts = dataset_path.split("/")
-    if len(path_parts) != 2:
-        raise ValueError("The path must be in the format 'organization/dataset'.")
+
+    # Validate the dataset path
+    get_validated_dataset_path(dataset_path)
 
     dataset_full_path = os.path.join(find_project_root(), "datasets", dataset_path)
 
@@ -282,6 +282,7 @@ def load(dataset_path: str) -> DataFrame:
         if local_dataset_exists
         else "Dataset fetched successfully from the remote server."
     )
+    # Printed to display info to the user
     print(message)
 
     return df

diff --git a/pandasai/helpers/path.py b/pandasai/helpers/path.py
@@ -42,6 +42,13 @@ def find_closest(filename):
     return os.path.join(find_project_root(filename), filename)
 
 
+def validate_name_format(value):
+    """
+    Validate name format to be 'my-org'
+    """
+    return bool(re.match(r"^[a-z0-9]+(?:-[a-z0-9]+)*$", value))
+
+
 def get_validated_dataset_path(path: str):
     # Validate path format
     path_parts = path.split("/")
@@ -54,12 +61,12 @@ def get_validated_dataset_path(path: str):
         raise ValueError("Both organization and dataset names are required")
 
     # Validate organization and dataset name format
-    if not bool(re.match(r"^[a-z0-9\-]+$", org_name)):
+    if not validate_name_format(org_name):
         raise ValueError(
             "Organization name must be lowercase and use hyphens instead of spaces (e.g. 'my-org')"
         )
 
-    if not bool(re.match(r"^[a-z0-9\-]+$", dataset_name)):
+    if not validate_name_format(dataset_name):
         raise ValueError(
             "Dataset name must be lowercase and use hyphens instead of spaces (e.g. 'my-dataset')"
         )

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pandasai"
-version = "3.0.0-beta.9"
+version = "3.0.0-beta.10"
 description = "Chat with your database (SQL, CSV, pandas, mongodb, noSQL, etc). PandaAI makes data analysis conversational using LLMs (GPT 3.5 / 4, Anthropic, VertexAI) and RAG."
 authors = ["Gabriele Venturi"]
 license = "MIT"

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -107,6 +107,24 @@ def test_get_validated_dataset_path_invalid_dataset():
         get_validated_dataset_path("my-org/INVALID_DATASET")
 
 
+def test_get_validated_dataset_path_start_with_hyphen():
+    """Test get_validated_dataset_path with invalid dataset name"""
+    with pytest.raises(
+        ValueError,
+        match="Dataset name must be lowercase and use hyphens instead of spaces",
+    ):
+        get_validated_dataset_path("my-org/-INVALID-DATASET")
+
+
+def test_get_validated_dataset_path_end_with_hyphen():
+    """Test get_validated_dataset_path with invalid dataset name"""
+    with pytest.raises(
+        ValueError,
+        match="Dataset name must be lowercase and use hyphens instead of spaces",
+    ):
+        get_validated_dataset_path("my-org/-INVALID-DATASET")
+
+
 @pytest.fixture
 def mock_dataset_loader():
     with patch("pandasai.cli.main.DatasetLoader") as mock:

diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py
@@ -120,7 +120,7 @@ def test_load_valid_dataset(
         mock_find_project_root.return_value = os.path.join("mock", "root")
         mock_exists.return_value = True
 
-        dataset_path = "org/dataset_name"
+        dataset_path = "org/dataset-name"
         result = pandasai.load(dataset_path)
 
         # Verify the class method was called
@@ -138,7 +138,7 @@ def test_load_dataset_not_found(self, mockenviron, mock_bytes_io, mock_zip_file)
         pandasai.get_pandaai_session.return_value = MagicMock()
         mock_request_session.get.return_value.status_code = 404
 
-        dataset_path = "org/dataset_name"
+        dataset_path = "org/dataset-name"
 
         with pytest.raises(DatasetNotFound):
             pandasai.load(dataset_path)
@@ -154,11 +154,11 @@ def test_load_missing_not_found_locally_and_no_remote_key(
         mock_response = MagicMock()
         mock_response.status_code = 404
         mock_session.return_value.get.return_value = mock_response
-        dataset_path = "org/dataset_name"
+        dataset_path = "org/dataset-name"
 
         with pytest.raises(
             PandaAIApiKeyError,
-            match='The dataset "org/dataset_name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.',
+            match='The dataset "org/dataset-name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.',
         ):
             pandasai.load(dataset_path)
 
@@ -167,7 +167,7 @@ def test_load_missing_not_found_locally_and_no_remote_key(
     def test_load_missing_api_url(self, mock_exists):
         """Test loading when API URL is missing."""
         mock_exists.return_value = False
-        dataset_path = "org/dataset_name"
+        dataset_path = "org/dataset-name"
 
         with pytest.raises(DatasetNotFound):
             pandasai.load(dataset_path)
@@ -181,7 +181,7 @@ def test_load_missing_not_found(self, mock_session, mock_exists):
         mock_response = MagicMock()
         mock_response.status_code = 404
         mock_session.return_value.get.return_value = mock_response
-        dataset_path = "org/dataset_name"
+        dataset_path = "org/dataset-name"
 
         with pytest.raises(DatasetNotFound):
             pandasai.load(dataset_path)
@@ -208,7 +208,7 @@ def test_load_successful_zip_extraction(
         mock_request_session.get.return_value.status_code = 200
         mock_request_session.get.return_value.content = b"mock zip content"
 
-        dataset_path = "org/dataset_name"
+        dataset_path = "org/dataset-name"
 
         # Mock the zip file extraction
         mock_zip_file.return_value.__enter__.return_value.extractall = MagicMock()