Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vicinity/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class OptionalDependencyError(ImportError):
def __init__(self, backend: Backend, extra: str) -> None:
msg = f"{backend} requires extra '{extra}'.\n" f"Install it with: pip install 'vicinity[{extra}]'\n"
msg = f"{backend} requires extra '{extra}'.\nInstall it with: pip install 'vicinity[{extra}]'\n"
super().__init__(msg)
self.backend = backend
self.extra = extra
Expand Down
15 changes: 7 additions & 8 deletions vicinity/backends/annoy.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,23 @@ def __len__(self) -> int:
return self.length

@classmethod
def load(cls: type[AnnoyBackend], base_path: Path) -> AnnoyBackend:
def load(cls: type[AnnoyBackend], path: Path) -> AnnoyBackend:
"""Load the vectors from a path."""
path = Path(base_path) / "index.bin"
index_path = path / "index.bin"

arguments = AnnoyArgs.load(base_path / "arguments.json")
arguments = AnnoyArgs.load(path / "arguments.json")
metric = cls._map_metric_to_string(arguments.metric)
index = AnnoyIndex(arguments.dim, metric) # type: ignore
index.load(str(path))
index.load(str(index_path))

return cls(index, arguments=arguments)

def save(self, base_path: Path) -> None:
def save(self, path: Path) -> None:
"""Save the vectors to a path."""
path = Path(base_path) / "index.bin"
self.index.save(str(path))
self.index.save(str(path / "index.bin"))
# Ensure the length is set before saving
self.arguments.length = len(self)
self.arguments.dump(base_path / "arguments.json")
self.arguments.dump(path / "arguments.json")

def query(self, vectors: npt.NDArray, k: int) -> QueryResult:
"""Query the backend."""
Expand Down
2 changes: 1 addition & 1 deletion vicinity/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def load(cls: type[BaseType], path: Path) -> BaseType:
raise NotImplementedError()

@abstractmethod
def save(self, base_path: Path) -> None:
def save(self, path: Path) -> None:
"""Save the backend to a file."""
raise NotImplementedError()

Expand Down
24 changes: 12 additions & 12 deletions vicinity/backends/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,25 +50,25 @@ def delete(self, indices: list[int]) -> None:
self._vectors = np.delete(self._vectors, indices, axis=0)
self._update_precomputed_data()

def save(self, folder: Path) -> None:
def save(self, path: Path) -> None:
"""Save the vectors to a path."""
path = folder / "vectors.npy"
path = path / "vectors.npy"
with open(path, "wb") as f:
np.save(f, self._vectors)

@staticmethod
def _load_vectors(folder: Path) -> npt.NDArray:
def _load_vectors(path: Path) -> npt.NDArray:
"""Load the vectors from a path."""
path = folder / "vectors.npy"
path = path / "vectors.npy"
with open(path, "rb") as f:
vectors = np.load(f)

return vectors

@classmethod
def load(cls, folder: Path) -> BasicVectorStore:
def load(cls, path: Path) -> BasicVectorStore:
"""Load the vectors from a path."""
vectors = cls._load_vectors(folder)
vectors = cls._load_vectors(path)
return cls(vectors=vectors)

@property
Expand Down Expand Up @@ -130,21 +130,21 @@ def from_vectors(cls, vectors: npt.NDArray, metric: str | Metric = "cosine", **k
raise ValueError(f"Unsupported metric: {metric}")

@classmethod
def load(cls, folder: Path) -> BasicBackend:
def load(cls, path: Path) -> BasicBackend:
"""Load the vectors from a path."""
arguments = BasicArgs.load(folder / "arguments.json")
vectors = cls._load_vectors(folder)
arguments = BasicArgs.load(path / "arguments.json")
vectors = cls._load_vectors(path)
if arguments.metric == Metric.COSINE:
return CosineBasicBackend(vectors, arguments)
elif arguments.metric == Metric.EUCLIDEAN:
return EuclideanBasicBackend(vectors, arguments)
else:
raise ValueError(f"Unsupported metric: {arguments.metric}")

def save(self, folder: Path) -> None:
def save(self, path: Path) -> None:
"""Save the vectors to a path."""
super().save(folder)
self.arguments.dump(folder / "arguments.json")
super().save(path)
self.arguments.dump(path / "arguments.json")

def threshold(
self,
Expand Down
12 changes: 6 additions & 6 deletions vicinity/backends/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,14 +191,14 @@ def threshold(self, vectors: npt.NDArray, threshold: float, max_k: int) -> Query

return out

def save(self, base_path: Path) -> None:
def save(self, path: Path) -> None:
"""Save the FAISS index and arguments."""
faiss.write_index(self.index, str(base_path / "index.faiss"))
self.arguments.dump(base_path / "arguments.json")
faiss.write_index(self.index, str(path / "index.faiss"))
self.arguments.dump(path / "arguments.json")

@classmethod
def load(cls: type[FaissBackend], base_path: Path) -> FaissBackend:
def load(cls: type[FaissBackend], path: Path) -> FaissBackend:
"""Load a FAISS index and arguments."""
arguments = FaissArgs.load(base_path / "arguments.json")
index = faiss.read_index(str(base_path / "index.faiss"))
arguments = FaissArgs.load(path / "arguments.json")
index = faiss.read_index(str(path / "index.faiss"))
return cls(index=index, arguments=arguments)
16 changes: 8 additions & 8 deletions vicinity/backends/hnsw.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,20 @@ def __len__(self) -> int:
return self.index.get_current_count()

@classmethod
def load(cls: type[HNSWBackend], base_path: Path) -> HNSWBackend:
def load(cls: type[HNSWBackend], path: Path) -> HNSWBackend:
"""Load the vectors from a path."""
path = Path(base_path) / "index.bin"
arguments = HNSWArgs.load(base_path / "arguments.json")
index_path = path / "index.bin"
arguments = HNSWArgs.load(path / "arguments.json")
mapped_metric = cls.inverse_metric_mapping[arguments.metric]
index = HnswIndex(space=mapped_metric, dim=arguments.dim)
index.load_index(str(path))
index.load_index(str(index_path))
return cls(index, arguments=arguments)

def save(self, base_path: Path) -> None:
def save(self, path: Path) -> None:
"""Save the vectors to a path."""
path = Path(base_path) / "index.bin"
self.index.save_index(str(path))
self.arguments.dump(base_path / "arguments.json")
index_path = path / "index.bin"
self.index.save_index(str(index_path))
self.arguments.dump(path / "arguments.json")

def query(self, vectors: npt.NDArray, k: int) -> QueryResult:
"""Query the backend."""
Expand Down
16 changes: 8 additions & 8 deletions vicinity/backends/pynndescent.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,25 @@ def threshold(self, vectors: npt.NDArray, threshold: float, max_k: int) -> Query
out.append((idx[mask], dist[mask]))
return out

def save(self, base_path: Path) -> None:
def save(self, path: Path) -> None:
"""Save the vectors and configuration to a specified path."""
self.arguments.dump(base_path / "arguments.json")
np.save(Path(base_path) / "vectors.npy", self.index._raw_data)
self.arguments.dump(path / "arguments.json")
np.save(Path(path) / "vectors.npy", self.index._raw_data)

# Optionally save the neighbor graph if it exists and needs to be reused
if hasattr(self.index, "_neighbor_graph"):
np.save(Path(base_path / "neighbor_graph.npy"), self.index._neighbor_graph)
np.save(Path(path / "neighbor_graph.npy"), self.index._neighbor_graph)

@classmethod
def load(cls: type[PyNNDescentBackend], base_path: Path) -> PyNNDescentBackend:
def load(cls: type[PyNNDescentBackend], path: Path) -> PyNNDescentBackend:
"""Load the vectors and configuration from a specified path."""
arguments = PyNNDescentArgs.load(base_path / "arguments.json")
vectors = np.load(Path(base_path) / "vectors.npy")
arguments = PyNNDescentArgs.load(path / "arguments.json")
vectors = np.load(Path(path) / "vectors.npy")

index = NNDescent(vectors, n_neighbors=arguments.n_neighbors, metric=arguments.metric.value)

# Load the neighbor graph if it was saved
neighbor_graph_path = base_path / "neighbor_graph.npy"
neighbor_graph_path = path / "neighbor_graph.npy"
if neighbor_graph_path.exists():
index._neighbor_graph = np.load(str(neighbor_graph_path), allow_pickle=True)

Expand Down
15 changes: 7 additions & 8 deletions vicinity/backends/usearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ def __len__(self) -> int:
return len(self.index)

@classmethod
def load(cls: type[UsearchBackend], base_path: Path) -> UsearchBackend:
def load(cls: type[UsearchBackend], path: Path) -> UsearchBackend:
"""Load the index from a path."""
path = Path(base_path) / "index.usearch"
arguments = UsearchArgs.load(base_path / "arguments.json")
index_path = path / "index.usearch"
arguments = UsearchArgs.load(path / "arguments.json")

index = UsearchIndex(
ndim=arguments.dim,
Expand All @@ -104,14 +104,13 @@ def load(cls: type[UsearchBackend], base_path: Path) -> UsearchBackend:
expansion_add=arguments.expansion_add,
expansion_search=arguments.expansion_search,
)
index.load(str(path))
index.load(str(index_path))
return cls(index, arguments=arguments)

def save(self, base_path: Path) -> None:
def save(self, path: Path) -> None:
"""Save the index to a path."""
path = Path(base_path) / "index.usearch"
self.index.save(str(path))
self.arguments.dump(base_path / "arguments.json")
self.index.save(str(path / "index.usearch"))
self.arguments.dump(path / "arguments.json")

def query(self, vectors: npt.NDArray, k: int) -> QueryResult:
"""Query the backend and return results as tuples of keys and distances."""
Expand Down
20 changes: 10 additions & 10 deletions vicinity/backends/voyager.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,26 +66,26 @@ def from_vectors(
VoyagerArgs(dim=dim, metric=metric_enum, ef_construction=ef_construction, m=m),
)

def query(self, query: npt.NDArray, k: int) -> QueryResult:
def query(self, vectors: npt.NDArray, k: int) -> QueryResult:
"""Query the backend for the nearest neighbors."""
k = min(k, len(self))
indices, distances = self.index.query(query, k)
indices, distances = self.index.query(vectors, k)
return list(zip(indices, distances))

@classmethod
def load(cls: type[VoyagerBackend], base_path: Path) -> VoyagerBackend:
def load(cls: type[VoyagerBackend], path: Path) -> VoyagerBackend:
"""Load the vectors from a path."""
path = Path(base_path) / "index.bin"
arguments = VoyagerArgs.load(base_path / "arguments.json")
with open(path, "rb") as f:
index_path = path / "index.bin"
arguments = VoyagerArgs.load(path / "arguments.json")
with open(index_path, "rb") as f:
index = Index.load(f)
return cls(index, arguments=arguments)

def save(self, base_path: Path) -> None:
def save(self, path: Path) -> None:
"""Save the vectors to a path."""
path = Path(base_path) / "index.bin"
self.index.save(str(path))
self.arguments.dump(base_path / "arguments.json")
index_path = path / "index.bin"
self.index.save(str(index_path))
self.arguments.dump(path / "arguments.json")

def insert(self, vectors: npt.NDArray) -> None:
"""Insert vectors into the backend."""
Expand Down
2 changes: 2 additions & 0 deletions vicinity/integrations/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def push_to_hub(
if vector_store is not None:
if isinstance(vector_store.vectors, np.ndarray):
vectors: list[list[float]] = vector_store.vectors.tolist()
else:
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vectors was possibly unbound.

vectors = vector_store.vectors
dataset_dict["vectors"] = vectors

dataset = Dataset.from_dict(dataset_dict)
Expand Down
4 changes: 2 additions & 2 deletions vicinity/vicinity.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(
"""
if len(items) != len(backend):
raise ValueError(
"Your vector space and list of items are not the same length: " f"{len(backend)} != {len(items)}"
f"Your vector space and list of items are not the same length: {len(backend)} != {len(items)}"
)
self.items: list[T] = list(items)
self.backend: AbstractBackend = backend
Expand Down Expand Up @@ -107,7 +107,7 @@ def dim(self) -> int:
return self.backend.dim

@property
def metric(self) -> str:
def metric(self) -> Metric:
"""The metric used by the backend."""
return self.backend.arguments.metric

Expand Down
Loading