diff --git a/vicinity/backends/__init__.py b/vicinity/backends/__init__.py index df33411..6c5720c 100644 --- a/vicinity/backends/__init__.py +++ b/vicinity/backends/__init__.py @@ -7,7 +7,7 @@ class OptionalDependencyError(ImportError): def __init__(self, backend: Backend, extra: str) -> None: - msg = f"{backend} requires extra '{extra}'.\n" f"Install it with: pip install 'vicinity[{extra}]'\n" + msg = f"{backend} requires extra '{extra}'.\nInstall it with: pip install 'vicinity[{extra}]'\n" super().__init__(msg) self.backend = backend self.extra = extra diff --git a/vicinity/backends/annoy.py b/vicinity/backends/annoy.py index ebfbf58..82b738a 100644 --- a/vicinity/backends/annoy.py +++ b/vicinity/backends/annoy.py @@ -85,24 +85,23 @@ def __len__(self) -> int: return self.length @classmethod - def load(cls: type[AnnoyBackend], base_path: Path) -> AnnoyBackend: + def load(cls: type[AnnoyBackend], path: Path) -> AnnoyBackend: """Load the vectors from a path.""" - path = Path(base_path) / "index.bin" + index_path = path / "index.bin" - arguments = AnnoyArgs.load(base_path / "arguments.json") + arguments = AnnoyArgs.load(path / "arguments.json") metric = cls._map_metric_to_string(arguments.metric) index = AnnoyIndex(arguments.dim, metric) # type: ignore - index.load(str(path)) + index.load(str(index_path)) return cls(index, arguments=arguments) - def save(self, base_path: Path) -> None: + def save(self, path: Path) -> None: """Save the vectors to a path.""" - path = Path(base_path) / "index.bin" - self.index.save(str(path)) + self.index.save(str(path / "index.bin")) # Ensure the length is set before saving self.arguments.length = len(self) - self.arguments.dump(base_path / "arguments.json") + self.arguments.dump(path / "arguments.json") def query(self, vectors: npt.NDArray, k: int) -> QueryResult: """Query the backend.""" diff --git a/vicinity/backends/base.py b/vicinity/backends/base.py index e3dada3..d954ba3 100644 --- a/vicinity/backends/base.py +++ b/vicinity/backends/base.py @@ -77,7 +77,7 @@ def load(cls: type[BaseType], path: Path) -> BaseType: raise NotImplementedError() @abstractmethod - def save(self, base_path: Path) -> None: + def save(self, path: Path) -> None: """Save the backend to a file.""" raise NotImplementedError() diff --git a/vicinity/backends/basic.py b/vicinity/backends/basic.py index ccae751..c961bc6 100644 --- a/vicinity/backends/basic.py +++ b/vicinity/backends/basic.py @@ -50,25 +50,25 @@ def delete(self, indices: list[int]) -> None: self._vectors = np.delete(self._vectors, indices, axis=0) self._update_precomputed_data() - def save(self, folder: Path) -> None: + def save(self, path: Path) -> None: """Save the vectors to a path.""" - path = folder / "vectors.npy" + path = path / "vectors.npy" with open(path, "wb") as f: np.save(f, self._vectors) @staticmethod - def _load_vectors(folder: Path) -> npt.NDArray: + def _load_vectors(path: Path) -> npt.NDArray: """Load the vectors from a path.""" - path = folder / "vectors.npy" + path = path / "vectors.npy" with open(path, "rb") as f: vectors = np.load(f) return vectors @classmethod - def load(cls, folder: Path) -> BasicVectorStore: + def load(cls, path: Path) -> BasicVectorStore: """Load the vectors from a path.""" - vectors = cls._load_vectors(folder) + vectors = cls._load_vectors(path) return cls(vectors=vectors) @property @@ -130,10 +130,10 @@ def from_vectors(cls, vectors: npt.NDArray, metric: str | Metric = "cosine", **k raise ValueError(f"Unsupported metric: {metric}") @classmethod - def load(cls, folder: Path) -> BasicBackend: + def load(cls, path: Path) -> BasicBackend: """Load the vectors from a path.""" - arguments = BasicArgs.load(folder / "arguments.json") - vectors = cls._load_vectors(folder) + arguments = BasicArgs.load(path / "arguments.json") + vectors = cls._load_vectors(path) if arguments.metric == Metric.COSINE: return CosineBasicBackend(vectors, arguments) elif arguments.metric == Metric.EUCLIDEAN: @@ -141,10 +141,10 @@ def load(cls, folder: Path) -> BasicBackend: else: raise ValueError(f"Unsupported metric: {arguments.metric}") - def save(self, folder: Path) -> None: + def save(self, path: Path) -> None: """Save the vectors to a path.""" - super().save(folder) - self.arguments.dump(folder / "arguments.json") + super().save(path) + self.arguments.dump(path / "arguments.json") def threshold( self, diff --git a/vicinity/backends/faiss.py b/vicinity/backends/faiss.py index 81ba1dd..2b3c6da 100644 --- a/vicinity/backends/faiss.py +++ b/vicinity/backends/faiss.py @@ -191,14 +191,14 @@ def threshold(self, vectors: npt.NDArray, threshold: float, max_k: int) -> Query return out - def save(self, base_path: Path) -> None: + def save(self, path: Path) -> None: """Save the FAISS index and arguments.""" - faiss.write_index(self.index, str(base_path / "index.faiss")) - self.arguments.dump(base_path / "arguments.json") + faiss.write_index(self.index, str(path / "index.faiss")) + self.arguments.dump(path / "arguments.json") @classmethod - def load(cls: type[FaissBackend], base_path: Path) -> FaissBackend: + def load(cls: type[FaissBackend], path: Path) -> FaissBackend: """Load a FAISS index and arguments.""" - arguments = FaissArgs.load(base_path / "arguments.json") - index = faiss.read_index(str(base_path / "index.faiss")) + arguments = FaissArgs.load(path / "arguments.json") + index = faiss.read_index(str(path / "index.faiss")) return cls(index=index, arguments=arguments) diff --git a/vicinity/backends/hnsw.py b/vicinity/backends/hnsw.py index 8cd3e2f..7f65006 100644 --- a/vicinity/backends/hnsw.py +++ b/vicinity/backends/hnsw.py @@ -76,20 +76,20 @@ def __len__(self) -> int: return self.index.get_current_count() @classmethod - def load(cls: type[HNSWBackend], base_path: Path) -> HNSWBackend: + def load(cls: type[HNSWBackend], path: Path) -> HNSWBackend: """Load the vectors from a path.""" - path = Path(base_path) / "index.bin" - arguments = HNSWArgs.load(base_path / "arguments.json") + index_path = path / "index.bin" + arguments = HNSWArgs.load(path / "arguments.json") mapped_metric = cls.inverse_metric_mapping[arguments.metric] index = HnswIndex(space=mapped_metric, dim=arguments.dim) - index.load_index(str(path)) + index.load_index(str(index_path)) return cls(index, arguments=arguments) - def save(self, base_path: Path) -> None: + def save(self, path: Path) -> None: """Save the vectors to a path.""" - path = Path(base_path) / "index.bin" - self.index.save_index(str(path)) - self.arguments.dump(base_path / "arguments.json") + index_path = path / "index.bin" + self.index.save_index(str(index_path)) + self.arguments.dump(path / "arguments.json") def query(self, vectors: npt.NDArray, k: int) -> QueryResult: """Query the backend.""" diff --git a/vicinity/backends/pynndescent.py b/vicinity/backends/pynndescent.py index cbf44e9..0b398e9 100644 --- a/vicinity/backends/pynndescent.py +++ b/vicinity/backends/pynndescent.py @@ -90,25 +90,25 @@ def threshold(self, vectors: npt.NDArray, threshold: float, max_k: int) -> Query out.append((idx[mask], dist[mask])) return out - def save(self, base_path: Path) -> None: + def save(self, path: Path) -> None: """Save the vectors and configuration to a specified path.""" - self.arguments.dump(base_path / "arguments.json") - np.save(Path(base_path) / "vectors.npy", self.index._raw_data) + self.arguments.dump(path / "arguments.json") + np.save(Path(path) / "vectors.npy", self.index._raw_data) # Optionally save the neighbor graph if it exists and needs to be reused if hasattr(self.index, "_neighbor_graph"): - np.save(Path(base_path / "neighbor_graph.npy"), self.index._neighbor_graph) + np.save(Path(path / "neighbor_graph.npy"), self.index._neighbor_graph) @classmethod - def load(cls: type[PyNNDescentBackend], base_path: Path) -> PyNNDescentBackend: + def load(cls: type[PyNNDescentBackend], path: Path) -> PyNNDescentBackend: """Load the vectors and configuration from a specified path.""" - arguments = PyNNDescentArgs.load(base_path / "arguments.json") - vectors = np.load(Path(base_path) / "vectors.npy") + arguments = PyNNDescentArgs.load(path / "arguments.json") + vectors = np.load(Path(path) / "vectors.npy") index = NNDescent(vectors, n_neighbors=arguments.n_neighbors, metric=arguments.metric.value) # Load the neighbor graph if it was saved - neighbor_graph_path = base_path / "neighbor_graph.npy" + neighbor_graph_path = path / "neighbor_graph.npy" if neighbor_graph_path.exists(): index._neighbor_graph = np.load(str(neighbor_graph_path), allow_pickle=True) diff --git a/vicinity/backends/usearch.py b/vicinity/backends/usearch.py index 8e51858..55c3ead 100644 --- a/vicinity/backends/usearch.py +++ b/vicinity/backends/usearch.py @@ -92,10 +92,10 @@ def __len__(self) -> int: return len(self.index) @classmethod - def load(cls: type[UsearchBackend], base_path: Path) -> UsearchBackend: + def load(cls: type[UsearchBackend], path: Path) -> UsearchBackend: """Load the index from a path.""" - path = Path(base_path) / "index.usearch" - arguments = UsearchArgs.load(base_path / "arguments.json") + index_path = path / "index.usearch" + arguments = UsearchArgs.load(path / "arguments.json") index = UsearchIndex( ndim=arguments.dim, @@ -104,14 +104,13 @@ def load(cls: type[UsearchBackend], base_path: Path) -> UsearchBackend: expansion_add=arguments.expansion_add, expansion_search=arguments.expansion_search, ) - index.load(str(path)) + index.load(str(index_path)) return cls(index, arguments=arguments) - def save(self, base_path: Path) -> None: + def save(self, path: Path) -> None: """Save the index to a path.""" - path = Path(base_path) / "index.usearch" - self.index.save(str(path)) - self.arguments.dump(base_path / "arguments.json") + self.index.save(str(path / "index.usearch")) + self.arguments.dump(path / "arguments.json") def query(self, vectors: npt.NDArray, k: int) -> QueryResult: """Query the backend and return results as tuples of keys and distances.""" diff --git a/vicinity/backends/voyager.py b/vicinity/backends/voyager.py index 23ca4d8..86f18be 100644 --- a/vicinity/backends/voyager.py +++ b/vicinity/backends/voyager.py @@ -66,26 +66,26 @@ def from_vectors( VoyagerArgs(dim=dim, metric=metric_enum, ef_construction=ef_construction, m=m), ) - def query(self, query: npt.NDArray, k: int) -> QueryResult: + def query(self, vectors: npt.NDArray, k: int) -> QueryResult: """Query the backend for the nearest neighbors.""" k = min(k, len(self)) - indices, distances = self.index.query(query, k) + indices, distances = self.index.query(vectors, k) return list(zip(indices, distances)) @classmethod - def load(cls: type[VoyagerBackend], base_path: Path) -> VoyagerBackend: + def load(cls: type[VoyagerBackend], path: Path) -> VoyagerBackend: """Load the vectors from a path.""" - path = Path(base_path) / "index.bin" - arguments = VoyagerArgs.load(base_path / "arguments.json") - with open(path, "rb") as f: + index_path = path / "index.bin" + arguments = VoyagerArgs.load(path / "arguments.json") + with open(index_path, "rb") as f: index = Index.load(f) return cls(index, arguments=arguments) - def save(self, base_path: Path) -> None: + def save(self, path: Path) -> None: """Save the vectors to a path.""" - path = Path(base_path) / "index.bin" - self.index.save(str(path)) - self.arguments.dump(base_path / "arguments.json") + index_path = path / "index.bin" + self.index.save(str(index_path)) + self.arguments.dump(path / "arguments.json") def insert(self, vectors: npt.NDArray) -> None: """Insert vectors into the backend.""" diff --git a/vicinity/integrations/huggingface.py b/vicinity/integrations/huggingface.py index a45636f..ded4e3a 100644 --- a/vicinity/integrations/huggingface.py +++ b/vicinity/integrations/huggingface.py @@ -55,6 +55,8 @@ def push_to_hub( if vector_store is not None: if isinstance(vector_store.vectors, np.ndarray): vectors: list[list[float]] = vector_store.vectors.tolist() + else: + vectors = vector_store.vectors dataset_dict["vectors"] = vectors dataset = Dataset.from_dict(dataset_dict) diff --git a/vicinity/vicinity.py b/vicinity/vicinity.py index b1334e0..6c1fd9e 100644 --- a/vicinity/vicinity.py +++ b/vicinity/vicinity.py @@ -48,7 +48,7 @@ def __init__( """ if len(items) != len(backend): raise ValueError( - "Your vector space and list of items are not the same length: " f"{len(backend)} != {len(items)}" + f"Your vector space and list of items are not the same length: {len(backend)} != {len(items)}" ) self.items: list[T] = list(items) self.backend: AbstractBackend = backend @@ -107,7 +107,7 @@ def dim(self) -> int: return self.backend.dim @property - def metric(self) -> str: + def metric(self) -> Metric: """The metric used by the backend.""" return self.backend.arguments.metric