Skip to content
43 changes: 23 additions & 20 deletions loreal_poc/dataloaders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ def __len__(self) -> int:
def get_image(self, idx: int) -> np.ndarray:
...

@property
def marks_none(self) -> Optional[np.ndarray]:
@classmethod
def marks_none(cls) -> Optional[np.ndarray]:
return None

@property
def meta_none(self) -> Optional[Dict]:
@classmethod
def meta_none(cls) -> Optional[Dict]:
return None

def get_marks(self, idx: int) -> Optional[np.ndarray]:
Expand All @@ -49,9 +49,9 @@ def __getitem__(
) -> Tuple[np.ndarray, Optional[np.ndarray], Optional[Dict[Any, Any]]]: # (image, marks, meta)
idx = self.idx_sampler[idx]
marks = self.get_marks(idx)
marks = marks if marks is not None else self.marks_none
marks = marks if marks is not None else self.marks_none()
meta = self.get_meta(idx)
meta = meta if meta is not None else self.meta_none
meta = meta if meta is not None else self.meta_none()
return self.get_image(idx), marks, meta

@property
Expand Down Expand Up @@ -114,16 +114,21 @@ def __init__(
collate_fn: Optional[Callable] = None,
) -> None:
super().__init__(name, batch_size=batch_size)
# Get the images paths
images_dir_path = self._get_absolute_local_path(images_dir_path)
landmarks_dir_path = self._get_absolute_local_path(landmarks_dir_path)

self.image_paths = self._get_all_paths_based_on_suffix(images_dir_path, self.image_suffix)
self.marks_paths = self._get_all_paths_based_on_suffix(landmarks_dir_path, self.marks_suffix)
if len(self.marks_paths) != len(self.image_paths):
raise ValueError(
f"{self.__class__.__name__}: Only {len(self.marks_paths)} found "
f"for {len(self.marks_paths)} of the images."
)

self.marks_paths = None
# If landmarks folder is not none, we should load them
# Else, the get marks method should be overridden
if landmarks_dir_path is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

landmarks_dir_path is a mandatory parameter since we're treating only the "supervised" case, this should never be None, actually we should raise an error if it is.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm doing this to allow for extension as for ffhq (ie to provide another way to load the marks).

Not the cleanest, but this make is easier to extend the class

landmarks_dir_path = self._get_absolute_local_path(landmarks_dir_path)
self.marks_paths = self._get_all_paths_based_on_suffix(landmarks_dir_path, self.marks_suffix)
if len(self.marks_paths) != len(self.image_paths):
raise ValueError(
f"{self.__class__.__name__}: Only {len(self.marks_paths)} found "
f"for {len(self.marks_paths)} of the images."
)

self.shuffle = shuffle

Expand Down Expand Up @@ -151,9 +156,7 @@ def _get_absolute_local_path(self, local_path: Union[str, Path]) -> Path:

@classmethod
def _get_all_paths_based_on_suffix(cls, dir_path: Path, suffix: str) -> List[Path]:
all_paths_with_suffix = list(
sorted([p for p in dir_path.iterdir() if p.suffix == suffix], key=lambda p: str(p))
)
all_paths_with_suffix = list(sorted([p for p in dir_path.iterdir() if p.suffix == suffix], key=str))
if len(all_paths_with_suffix) == 0:
raise ValueError(
f"{cls.__class__.__name__}: Landmarks with suffix {suffix}"
Expand All @@ -164,9 +167,9 @@ def _get_all_paths_based_on_suffix(cls, dir_path: Path, suffix: str) -> List[Pat
def __len__(self) -> int:
return math.floor(len(self.image_paths) / self.batch_size)

@property
def marks_none(self):
return np.full((self.n_landmarks, self.n_landmarks), np.nan)
@classmethod
def marks_none(cls) -> np.ndarray:
return np.full((cls.n_landmarks, cls.n_landmarks), np.nan)

def get_image(self, idx: int) -> np.ndarray:
return self._load_and_validate_image(self.image_paths[idx])
Expand Down
45 changes: 21 additions & 24 deletions loreal_poc/dataloaders/loaders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from pathlib import Path
from typing import Callable, Dict, Optional, Union
from typing import Callable, Dict, List, Optional, Union

import cv2
import numpy as np
Expand Down Expand Up @@ -72,34 +72,30 @@ def __init__(
rng_seed: Optional[int] = None,
collate_fn: Optional[Callable] = None,
) -> None:
# TODO!!: super __init__!!
super().__init__(
images_dir_path=dir_path,
landmarks_dir_path=None,
name=name,
batch_size=batch_size,
collate_fn=collate_fn,
rng_seed=rng_seed,
shuffle=shuffle,
meta=None,
)
with (Path(dir_path) / "ffhq-dataset-meta.json").open(encoding="utf-8") as fp:
self.landmarks: Dict[int, List[List[float]]] = {
int(k): v["image"]["face_landmarks"] for k, v in json.load(fp).items()
}

images_dir_path = self._get_absolute_local_path(dir_path)
self.image_paths = self._get_all_paths_based_on_suffix(images_dir_path, self.image_suffix)
f = open(Path(dir_path) / "ffhq-dataset-meta.json")
self.landmarks_data = json.load(f)
f.close()

# TODO: No good
self.name = name
self.batch_size = batch_size
self.shuffle = shuffle

self.rng = np.random.default_rng(rng_seed)

self.idx_sampler = list(range(len(self.image_paths)))
if shuffle:
self.rng.shuffle(self.idx_sampler)

if collate_fn is not None:
self._collate_fn = collate_fn

def get_marks(self, idx: int) -> Optional[np.ndarray]:
return np.array(self.landmarks_data[str(idx)]["image"]["face_landmarks"])
return np.array(self.landmarks[idx])

def get_meta(self, idx: int) -> Optional[Dict]:
f = open(f"ffhq/{idx:05d}.json")
meta = json.load(f)
f.close()
with Path(f"ffhq/{idx:05d}.json").open(encoding="utf-8") as fp:
meta = json.load(fp)
return meta[0]

@classmethod
Expand All @@ -114,5 +110,6 @@ def load_image_from_file(cls, image_file: Path) -> np.ndarray:
"""
return cv2.imread(str(image_file))

@classmethod
def load_marks_from_file(cls, mark_file: Path) -> np.ndarray:
pass
raise NotImplementedError("Should not be called for FFHQ")
13 changes: 6 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ build-backend = "setuptools.build_meta"
name = "loreal-poc"
version = "2023.11.27"
description = "Assessing the quality of L'Oreal's facial landmark models"
authors = [
{name = "Rabah Abdul Khalek", email = "rabah@giskard.ai"},
]
authors = [{ name = "Rabah Abdul Khalek", email = "rabah@giskard.ai" }]
dependencies = [
"pillow>=10.1.0", # just for drawing
"opencv-python",
Expand All @@ -23,20 +21,20 @@ notebook = "jupyter notebook --ip 0.0.0.0 --port 8888 --no-browser --notebook-di
format.cmd = "bash -c 'ruff ./loreal_poc ./tests --fix && black ./loreal_poc ./examples ./tests && isort ./loreal_poc ./tests'"
check-format.cmd = "bash -c 'ruff ./loreal_poc ./tests && black --check ./loreal_poc ./examples ./tests && isort --check ./loreal_poc ./tests'"
test.cmd = "pytest tests/ -c pyproject.toml --disable-warnings -vvv --durations=0"
check-notebook="bash -c 'cd ./examples && pdm run jupyter nbconvert --to script -y *.ipynb && find . -type f | grep -e \".py$\" | xargs -I {} echo \"pdm run python {} && echo \"Notebook {} OK\" || exit 1\" | sh'"
check-notebook = "bash -c 'cd ./examples && pdm run jupyter nbconvert --to script -y *.ipynb && find . -type f | grep -e \".py$\" | sort | xargs -I {} echo \"pdm run python {} && echo \"Notebook {} OK\" || exit 1\" | sh'"

[tool.pdm.dev-dependencies]
dev = [
"face-alignment",
"opencv-contrib-python", # needed for lbfmodel
"opencv-contrib-python", # needed for lbfmodel
"notebook",
"matplotlib",
"black[jupyter]>=23.7.0",
"pytest>=7.4.0",
"pip>=23.2.1",
"pre-commit>=2.19.0",
"ruff",
"isort"
"isort",
]

[tool.ruff]
Expand Down Expand Up @@ -65,5 +63,6 @@ exclude = '''
| dist
| env
| venv
| .history
)/
'''
'''
12 changes: 6 additions & 6 deletions tests/dataloaders/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, name, length: int = 10, batch_size: int = 1):
self.idx_sampler = list(range(length))

def __len__(self) -> int:
return math.ceil(len(self.dataset) / self.batch_size)
return math.floor(len(self.dataset) / self.batch_size)

def get_image(self, idx: int) -> np.ndarray:
return self.dataset[idx]
Expand All @@ -37,17 +37,17 @@ def __init__(self, name, length: int = 10, batch_size: int = 1):
self.idx_sampler = list(range(length))

def __len__(self) -> int:
return math.ceil(len(self.dataset) / self.batch_size)
return math.floor(len(self.dataset) / self.batch_size)

def get_image(self, idx: int) -> np.ndarray:
return self.dataset[idx]

@property
def marks_none(self):
@classmethod
def marks_none(cls):
return np.full((68, 2), np.nan)

@property
def meta_none(self):
@classmethod
def meta_none(cls):
return {"key1": -1, "key2": -1}

def get_marks(self, idx: int) -> np.ndarray | None:
Expand Down