Giskard-AI · rabah-khalek · Jan 4, 2024 · Jan 2, 2024 · Jan 3, 2024 · Jan 3, 2024
diff --git a/loreal_poc/dataloaders/base.py b/loreal_poc/dataloaders/base.py
@@ -30,12 +30,12 @@ def __len__(self) -> int:
     def get_image(self, idx: int) -> np.ndarray:
         ...
 
-    @property
-    def marks_none(self) -> Optional[np.ndarray]:
+    @classmethod
+    def marks_none(cls) -> Optional[np.ndarray]:
         return None
 
-    @property
-    def meta_none(self) -> Optional[Dict]:
+    @classmethod
+    def meta_none(cls) -> Optional[Dict]:
         return None
 
     def get_marks(self, idx: int) -> Optional[np.ndarray]:
@@ -49,9 +49,9 @@ def __getitem__(
     ) -> Tuple[np.ndarray, Optional[np.ndarray], Optional[Dict[Any, Any]]]:  # (image, marks, meta)
         idx = self.idx_sampler[idx]
         marks = self.get_marks(idx)
-        marks = marks if marks is not None else self.marks_none
+        marks = marks if marks is not None else self.marks_none()
         meta = self.get_meta(idx)
-        meta = meta if meta is not None else self.meta_none
+        meta = meta if meta is not None else self.meta_none()
         return self.get_image(idx), marks, meta
 
     @property
@@ -114,16 +114,21 @@ def __init__(
         collate_fn: Optional[Callable] = None,
     ) -> None:
         super().__init__(name, batch_size=batch_size)
+        # Get the images paths
         images_dir_path = self._get_absolute_local_path(images_dir_path)
-        landmarks_dir_path = self._get_absolute_local_path(landmarks_dir_path)
-
         self.image_paths = self._get_all_paths_based_on_suffix(images_dir_path, self.image_suffix)
-        self.marks_paths = self._get_all_paths_based_on_suffix(landmarks_dir_path, self.marks_suffix)
-        if len(self.marks_paths) != len(self.image_paths):
-            raise ValueError(
-                f"{self.__class__.__name__}: Only {len(self.marks_paths)} found "
-                f"for {len(self.marks_paths)} of the images."
-            )
+
+        self.marks_paths = None
+        # If landmarks folder is not none, we should load them
+        # Else, the get marks method should be overridden
+        if landmarks_dir_path is not None:
+            landmarks_dir_path = self._get_absolute_local_path(landmarks_dir_path)
+            self.marks_paths = self._get_all_paths_based_on_suffix(landmarks_dir_path, self.marks_suffix)
+            if len(self.marks_paths) != len(self.image_paths):
+                raise ValueError(
+                    f"{self.__class__.__name__}: Only {len(self.marks_paths)} found "
+                    f"for {len(self.marks_paths)} of the images."
+                )
 
         self.shuffle = shuffle
 
@@ -151,9 +156,7 @@ def _get_absolute_local_path(self, local_path: Union[str, Path]) -> Path:
 
     @classmethod
     def _get_all_paths_based_on_suffix(cls, dir_path: Path, suffix: str) -> List[Path]:
-        all_paths_with_suffix = list(
-            sorted([p for p in dir_path.iterdir() if p.suffix == suffix], key=lambda p: str(p))
-        )
+        all_paths_with_suffix = list(sorted([p for p in dir_path.iterdir() if p.suffix == suffix], key=str))
         if len(all_paths_with_suffix) == 0:
             raise ValueError(
                 f"{cls.__class__.__name__}: Landmarks with suffix {suffix}"
@@ -164,9 +167,9 @@ def _get_all_paths_based_on_suffix(cls, dir_path: Path, suffix: str) -> List[Pat
     def __len__(self) -> int:
         return math.floor(len(self.image_paths) / self.batch_size)
 
-    @property
-    def marks_none(self):
-        return np.full((self.n_landmarks, self.n_landmarks), np.nan)
+    @classmethod
+    def marks_none(cls) -> np.ndarray:
+        return np.full((cls.n_landmarks, cls.n_landmarks), np.nan)
 
     def get_image(self, idx: int) -> np.ndarray:
         return self._load_and_validate_image(self.image_paths[idx])

diff --git a/loreal_poc/dataloaders/loaders.py b/loreal_poc/dataloaders/loaders.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-from typing import Callable, Dict, Optional, Union
+from typing import Callable, Dict, List, Optional, Union
 
 import cv2
 import numpy as np
@@ -72,34 +72,30 @@ def __init__(
         rng_seed: Optional[int] = None,
         collate_fn: Optional[Callable] = None,
     ) -> None:
-        # TODO!!: super __init__!!
+        super().__init__(
+            images_dir_path=dir_path,
+            landmarks_dir_path=None,
+            name=name,
+            batch_size=batch_size,
+            collate_fn=collate_fn,
+            rng_seed=rng_seed,
+            shuffle=shuffle,
+            meta=None,
+        )
+        with (Path(dir_path) / "ffhq-dataset-meta.json").open(encoding="utf-8") as fp:
+            self.landmarks: Dict[int, List[List[float]]] = {
+                int(k): v["image"]["face_landmarks"] for k, v in json.load(fp).items()
+            }
+
         images_dir_path = self._get_absolute_local_path(dir_path)
         self.image_paths = self._get_all_paths_based_on_suffix(images_dir_path, self.image_suffix)
-        f = open(Path(dir_path) / "ffhq-dataset-meta.json")
-        self.landmarks_data = json.load(f)
-        f.close()
-
-        # TODO: No good
-        self.name = name
-        self.batch_size = batch_size
-        self.shuffle = shuffle
-
-        self.rng = np.random.default_rng(rng_seed)
-
-        self.idx_sampler = list(range(len(self.image_paths)))
-        if shuffle:
-            self.rng.shuffle(self.idx_sampler)
-
-        if collate_fn is not None:
-            self._collate_fn = collate_fn
 
     def get_marks(self, idx: int) -> Optional[np.ndarray]:
-        return np.array(self.landmarks_data[str(idx)]["image"]["face_landmarks"])
+        return np.array(self.landmarks[idx])
 
     def get_meta(self, idx: int) -> Optional[Dict]:
-        f = open(f"ffhq/{idx:05d}.json")
-        meta = json.load(f)
-        f.close()
+        with Path(f"ffhq/{idx:05d}.json").open(encoding="utf-8") as fp:
+            meta = json.load(fp)
         return meta[0]
 
     @classmethod
@@ -114,5 +110,6 @@ def load_image_from_file(cls, image_file: Path) -> np.ndarray:
         """
         return cv2.imread(str(image_file))
 
+    @classmethod
     def load_marks_from_file(cls, mark_file: Path) -> np.ndarray:
-        pass
+        raise NotImplementedError("Should not be called for FFHQ")
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,9 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "loreal-poc"
 version = "2023.11.27"
 description = "Assessing the quality of L'Oreal's facial landmark models"
-authors = [
-    {name = "Rabah Abdul Khalek", email = "rabah@giskard.ai"},
-]
+authors = [{ name = "Rabah Abdul Khalek", email = "rabah@giskard.ai" }]
 dependencies = [
     "pillow>=10.1.0", # just for drawing
     "opencv-python",
@@ -23,20 +21,20 @@ notebook = "jupyter notebook --ip 0.0.0.0 --port 8888 --no-browser --notebook-di
 format.cmd = "bash -c 'ruff ./loreal_poc ./tests --fix && black ./loreal_poc ./examples ./tests  && isort ./loreal_poc ./tests'"
 check-format.cmd = "bash -c 'ruff ./loreal_poc ./tests && black --check ./loreal_poc ./examples ./tests && isort --check ./loreal_poc ./tests'"
 test.cmd = "pytest tests/ -c pyproject.toml --disable-warnings -vvv --durations=0"
-check-notebook="bash -c 'cd ./examples && pdm run jupyter nbconvert --to script -y *.ipynb && find . -type f  | grep -e \".py$\" | xargs -I {} echo \"pdm run python {} && echo \"Notebook {} OK\" || exit 1\" | sh'"
+check-notebook = "bash -c 'cd ./examples && pdm run jupyter nbconvert --to script -y *.ipynb && find . -type f  | grep -e \".py$\" | sort | xargs -I {} echo \"pdm run python {} && echo \"Notebook {} OK\" || exit 1\" | sh'"
 
 [tool.pdm.dev-dependencies]
 dev = [
     "face-alignment",
-    "opencv-contrib-python", # needed for lbfmodel
+    "opencv-contrib-python",  # needed for lbfmodel
     "notebook",
     "matplotlib",
     "black[jupyter]>=23.7.0",
     "pytest>=7.4.0",
     "pip>=23.2.1",
     "pre-commit>=2.19.0",
     "ruff",
-    "isort"
+    "isort",
 ]
 
 [tool.ruff]
@@ -65,5 +63,6 @@ exclude = '''
     | dist
     | env
     | venv
+    | .history
 )/
-'''
+'''
diff --git a/tests/dataloaders/test_base.py b/tests/dataloaders/test_base.py
@@ -17,7 +17,7 @@ def __init__(self, name, length: int = 10, batch_size: int = 1):
         self.idx_sampler = list(range(length))
 
     def __len__(self) -> int:
-        return math.ceil(len(self.dataset) / self.batch_size)
+        return math.floor(len(self.dataset) / self.batch_size)
 
     def get_image(self, idx: int) -> np.ndarray:
         return self.dataset[idx]
@@ -37,17 +37,17 @@ def __init__(self, name, length: int = 10, batch_size: int = 1):
         self.idx_sampler = list(range(length))
 
     def __len__(self) -> int:
-        return math.ceil(len(self.dataset) / self.batch_size)
+        return math.floor(len(self.dataset) / self.batch_size)
 
     def get_image(self, idx: int) -> np.ndarray:
         return self.dataset[idx]
 
-    @property
-    def marks_none(self):
+    @classmethod
+    def marks_none(cls):
         return np.full((68, 2), np.nan)
 
-    @property
-    def meta_none(self):
+    @classmethod
+    def meta_none(cls):
         return {"key1": -1, "key2": -1}
 
     def get_marks(self, idx: int) -> np.ndarray | None: