Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chromadb build error #6729

Open
Naheemateon opened this issue Feb 20, 2025 · 2 comments
Open

Chromadb build error #6729

Naheemateon opened this issue Feb 20, 2025 · 2 comments
Labels
bug Something isn't working

Comments

@Naheemateon
Copy link

Bug Description

I am trying to build a RAG app in langflow everything was working fine till this happened.

Error building Component Chroma DB: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'C:\Users\nahee'
Duration:
5.39 seconds

Image

Reproduction

from copy import deepcopy

from chromadb.config import Settings
from langchain_chroma import Chroma
from typing_extensions import override

from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.base.vectorstores.utils import chroma_collection_to_data
from langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, StrInput
from langflow.schema import Data

class ChromaVectorStoreComponent(LCVectorStoreComponent):
"""Chroma Vector Store with search capabilities."""

display_name: str = "Chroma DB"
description: str = "Chroma Vector Store with search capabilities"
name = "Chroma"
icon = "Chroma"

inputs = [
    StrInput(
        name="collection_name",
        display_name="Collection Name",
        value="langflow",
    ),
    StrInput(
        name="persist_directory",
        display_name="Persist Directory",
    ),
    *LCVectorStoreComponent.inputs,
    HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
    StrInput(
        name="chroma_server_cors_allow_origins",
        display_name="Server CORS Allow Origins",
        advanced=True,
    ),
    StrInput(
        name="chroma_server_host",
        display_name="Server Host",
        advanced=True,
    ),
    IntInput(
        name="chroma_server_http_port",
        display_name="Server HTTP Port",
        advanced=True,
    ),
    IntInput(
        name="chroma_server_grpc_port",
        display_name="Server gRPC Port",
        advanced=True,
    ),
    BoolInput(
        name="chroma_server_ssl_enabled",
        display_name="Server SSL Enabled",
        advanced=True,
    ),
    BoolInput(
        name="allow_duplicates",
        display_name="Allow Duplicates",
        advanced=True,
        info="If false, will not add documents that are already in the Vector Store.",
    ),
    DropdownInput(
        name="search_type",
        display_name="Search Type",
        options=["Similarity", "MMR"],
        value="Similarity",
        advanced=True,
    ),
    IntInput(
        name="number_of_results",
        display_name="Number of Results",
        info="Number of results to return.",
        advanced=True,
        value=10,
    ),
    IntInput(
        name="limit",
        display_name="Limit",
        advanced=True,
        info="Limit the number of records to compare when Allow Duplicates is False.",
    ),
]

@override
@check_cached_vector_store
def build_vector_store(self) -> Chroma:
    """Builds the Chroma object."""
    try:
        from chromadb import Client
        from langchain_chroma import Chroma
    except ImportError as e:
        msg = "Could not import Chroma integration package. Please install it with `pip install langchain-chroma`."
        raise ImportError(msg) from e
    # Chroma settings
    chroma_settings = None
    client = None
    if self.chroma_server_host:
        chroma_settings = Settings(
            chroma_server_cors_allow_origins=self.chroma_server_cors_allow_origins or [],
            chroma_server_host=self.chroma_server_host,
            chroma_server_http_port=self.chroma_server_http_port or None,
            chroma_server_grpc_port=self.chroma_server_grpc_port or None,
            chroma_server_ssl_enabled=self.chroma_server_ssl_enabled,
        )
        client = Client(settings=chroma_settings)

    # Check persist_directory and expand it if it is a relative path
    persist_directory = self.resolve_path(self.persist_directory) if self.persist_directory is not None else None

    chroma = Chroma(
        persist_directory=persist_directory,
        client=client,
        embedding_function=self.embedding,
        collection_name=self.collection_name,
    )

    self._add_documents_to_vector_store(chroma)
    self.status = chroma_collection_to_data(chroma.get(limit=self.limit))
    return chroma

def _add_documents_to_vector_store(self, vector_store: "Chroma") -> None:
    """Adds documents to the Vector Store."""
    if not self.ingest_data:
        self.status = ""
        return

    stored_documents_without_id = []
    if self.allow_duplicates:
        stored_data = []
    else:
        stored_data = chroma_collection_to_data(vector_store.get(limit=self.limit))
        for value in deepcopy(stored_data):
            del value.id
            stored_documents_without_id.append(value)

    documents = []
    for _input in self.ingest_data or []:
        if isinstance(_input, Data):
            if _input not in stored_documents_without_id:
                documents.append(_input.to_lc_document())
        else:
            msg = "Vector Store Inputs must be Data objects."
            raise TypeError(msg)

    if documents and self.embedding is not None:
        self.log(f"Adding {len(documents)} documents to the Vector Store.")
        vector_store.add_documents(documents)
    else:
        self.log("No documents to add to the Vector Store.")

Expected behavior

Everything is supposed to work perfectly

Who can help?

No response

Operating System

Windows 10

Langflow Version

v1.1.4.posst1

Python Version

None

Screenshot

No response

Flow File

No response

@Naheemateon Naheemateon added the bug Something isn't working label Feb 20, 2025
Copy link

dosubot bot commented Feb 20, 2025

@Naheemateon Hello there! I'm here to assist you with any bugs, questions, or contributions. Let's tackle this together!

To resolve the "Error building Component Chroma DB: [WinError 123] The filename, directory name, or volume label syntax is incorrect" on Windows 10, you should check the persist_directory input for any invalid characters or incorrect syntax. This error indicates that the path provided for the persist_directory is not valid according to Windows file system rules.

Ensure that the persist_directory path is correctly formatted and does not contain any illegal characters such as <>:"/\|?*. You can use the resolve_path method to handle relative paths, as shown in the code:

persist_directory = self.resolve_path(self.persist_directory) if self.persist_directory is not None else None

Make sure that the persist_directory is a valid path and does not contain any characters that are not allowed in Windows file paths [1].

To continue talking to Dosu, mention @dosu.


Help Dosu learn! Give it feedback: Great Response | Irrelevant Answer | Incorrect Sources | Too Verbose | Hallucination | Bug Report | Other

@Tk3699
Copy link

Tk3699 commented Feb 20, 2025

Here is the corrected version of this code................
I have made the following changes.
I kept the import statements for Client and Chroma inside the try block. And then Added checks for None in persist_directory to prevent errors if relative paths aren't provided.
This was a hectic process but then I Enhanced the logic, which checks data for duplicates only if allow_duplicates is False. Last but not least I started to Wrap the vector_store.add_documents(documents) call in a try-except block to gracefully log any exceptions that arise during the addition of documents.
Lastly, I Adjusted the error message in TypeError for better clarity.
here's the code

display_name: str = "Chroma DB"
description: str = "Chroma Vector Store with search capabilities"
name = "Chroma"
icon = "Chroma"

inputs = [
StrInput(
name="collection_name",
display_name="Collection Name",
value="langflow",
),
StrInput(
name="persist_directory",
display_name="Persist Directory",
),
*LCVectorStoreComponent.inputs,
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
StrInput(
name="chroma_server_cors_allow_origins",
display_name="Server CORS Allow Origins",
advanced=True,
),
StrInput(
name="chroma_server_host",
display_name="Server Host",
advanced=True,
),
IntInput(
name="chroma_server_http_port",
display_name="Server HTTP Port",
advanced=True,
),
IntInput(
name="chroma_server_grpc_port",
display_name="Server gRPC Port",
advanced=True,
),
BoolInput(
name="chroma_server_ssl_enabled",
display_name="Server SSL Enabled",
advanced=True,
),
BoolInput(
name="allow_duplicates",
display_name="Allow Duplicates",
advanced=True,
info="If false, will not add documents that are already in the Vector Store.",
),
DropdownInput(
name="search_type",
display_name="Search Type",
options=["Similarity", "MMR"],
value="Similarity",
advanced=True,
),
IntInput(
name="number_of_results",
display_name="Number of Results",
info="Number of results to return.",
advanced=True,
value=10,
),
IntInput(
name="limit",
display_name="Limit",
advanced=True,
info="Limit the number of records to compare when Allow Duplicates is False.",
),
]

@OverRide
@check_cached_vector_store
def build_vector_store(self) -> Chroma:
"""Builds the Chroma object."""
try:
from chromadb import Client
from langchain_chroma import Chroma
except ImportError as e:
msg = "Could not import Chroma integration package. Please install it with pip install langchain-chroma."
raise ImportError(msg) from e

# Chroma settings  
chroma_settings = None  
client = None  
if self.chroma_server_host:  
    chroma_settings = Settings(  
        chroma_server_cors_allow_origins=self.chroma_server_cors_allow_origins or [],  
        chroma_server_host=self.chroma_server_host,  
        chroma_server_http_port=self.chroma_server_http_port or None,  
        chroma_server_grpc_port=self.chroma_server_grpc_port or None,  
        chroma_server_ssl_enabled=self.chroma_server_ssl_enabled,  
    )  
    client = Client(settings=chroma_settings)  

# Check persist_directory and expand it if it is a relative path  
persist_directory = self.resolve_path(self.persist_directory) if self.persist_directory else None  

chroma = Chroma(  
    persist_directory=persist_directory,  
    client=client,  
    embedding_function=self.embedding,  
    collection_name=self.collection_name,  
)  

self._add_documents_to_vector_store(chroma)  
self.status = chroma_collection_to_data(chroma.get(limit=self.limit))  
return chroma  

def _add_documents_to_vector_store(self, vector_store: "Chroma") -> None:
"""Adds documents to the Vector Store."""
if not self.ingest_data:
self.status = ""
return

stored_documents_without_id = []  
if self.allow_duplicates:  
    stored_data = []  
else:  
    stored_data = chroma_collection_to_data(vector_store.get(limit=self.limit))  
    for value in deepcopy(stored_data):  
        del value.id  
        stored_documents_without_id.append(value)  

documents = []  
for _input in self.ingest_data:  
    if isinstance(_input, Data):  
        # Check for duplicates if not allowed  
        if not self.allow_duplicates and _input in stored_documents_without_id:  
            continue  
        documents.append(_input.to_lc_document())  
    else:  
        raise TypeError("Vector Store Inputs must be Data objects.")  

if documents and self.embedding is not None:  
    self.log(f"Adding {len(documents)} documents to the Vector Store.")  
    try:  
        vector_store.add_documents(documents)  
    except Exception as e:  
        self.log(f"Error adding documents to the Vector Store: {e}")  
else:  
    self.log("No documents to add to the Vector Store.")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants