jaesuny
diff --git a/‎README.md‎
Lines changed: 14 additions & 1 deletion b/‎README.md‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎configs/faster_rcnn_res101.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/faster_rcnn_res101.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/config/defaults.py‎
Lines changed: 1 addition & 0 deletions b/‎lib/config/defaults.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/data/vg_hdf5.py‎
Lines changed: 32 additions & 116 deletions b/‎lib/data/vg_hdf5.py‎
Lines changed: 32 additions & 116 deletions
diff --git a/‎lib/model.py‎
Lines changed: 66 additions & 34 deletions b/‎lib/model.py‎
Lines changed: 66 additions & 34 deletions
@@ -35,6 +35,19 @@ The goal of gathering all these representative methods into a single repo is to
 - [x] Scene Graph Generation Baseline (:balloon: 2019-07-06)
 - [x] Iterative Message Passing (IMP) (:balloon: 2019-07-07)
 - [ ] Multi-level Scene Description Network (MSDN)
-- [x] Neural Motif (Frequency Prior) (:balloon: 2019-07-08)
+- [x] Neural Motif (Frequency Prior Baseline) (:balloon: 2019-07-08)
 - [ ] Neural Motif
 - [ ] Graph R-CNN
+
+## Benchmarking
+
+### Object Detection
+
+backbone | model | #GPUs | batch size | base_lr | lr_decay_step | max_iter | mAP@0.5 | mAP@0.50:0.95
+--------|--------|--------|--------|---------|--------|--------|--------|---------
+Res101 | faster r-cnn | 6 | 6 | 5e-3 | (70k,90k) | 100k | - | -
+
+### Scene Graph Generation
+backbone | model | #GPUs | batch size | base_lr | lr_decay_step | max_iter | sgdet@20 | sgdet@50 | sgdet@100
+--------|--------|--------|---------|--------|--------|--------|---------|---------
+Res101 | vanilla | 6 | 6 | 5e-3 | (70k,90k) | 100k | - | - | -
@@ -1,6 +1,7 @@
 DATASET:
   NAME: "vg"
   MODE: "benchmark"
+  PATH: "datasets/vg_bm"
   TRAIN_BATCH_SIZE: 6
   TEST_BATCH_SIZE: 1
 MODEL:
 
@@ -10,6 +10,7 @@
 _C.DATASET = CN()
 _C.DATASET.NAME = "vg"
 _C.DATASET.MODE = "benchmark"                    # dataset mode, benchmark | 1600-400-400 | 2500-600-400, etc
+_C.DATASET.PATH = "datasets/vg_bm"
 _C.DATASET.LOADER = 'object'                     # which kind of data loader to use, object | object+attribute | object+attribute+relationship
 _C.DATASET.TRAIN_BATCH_SIZE = 4
 _C.DATASET.TEST_BATCH_SIZE = 4
 
@@ -12,88 +12,46 @@
 from lib.utils.box import bbox_overlaps
 
 class vg_hdf5(Dataset):
-    def __init__(self, cfg, split="train", transforms=None, num_im=-1):
+    def __init__(self, cfg, split="train", transforms=None, num_im=-1, num_val_im=5000,
+            filter_duplicate_rels=True, filter_non_overlap=True, filter_empty_rels=True):
         assert split == "train" or split == "test", "split must be one of [train, val, test]"
         assert num_im >= -1, "the number of samples must be >= 0"
+
+        self.data_dir = cfg.DATASET.PATH
         self.transforms = transforms
-        self.data_dir = "datasets/vg_bm"
+
+        self.split = split
+        self.filter_non_overlap = filter_non_overlap
+        self.filter_duplicate_rels = filter_duplicate_rels and self.split == 'train'
+
         self.roidb_file = os.path.join(self.data_dir, "VG-SGG.h5")
         self.image_file = os.path.join(self.data_dir, "imdb_1024.h5")
         # read in dataset from a h5 file and a dict (json) file
         assert os.path.exists(self.data_dir), \
             "cannot find folder {}, please download the visual genome data into this folder".format(self.data_dir)
         self.im_h5 = h5py.File(self.image_file, 'r')
-        self.roi_h5 = h5py.File(os.path.join(self.data_dir, "VG-SGG.h5"), 'r')
         self.info = json.load(open(os.path.join(self.data_dir, "VG-SGG-dicts.json"), 'r'))
-
         self.im_refs = self.im_h5['images'] # image data reference
         im_scale = self.im_refs.shape[2]
 
-        print('split=' + split)
-        data_split = self.roi_h5['split'][:]
-
-        self.split = split
-        if split == "train" or split == "test":
-            split_label = 0 if split == "train" else 2
-            split_mask = data_split == split_label # current split
-        else: # -1
-            split_mask = data_split >= 0 # all
-        # get rid of images that do not have box
-        valid_mask = self.roi_h5['img_to_first_box'][:] >= 0
-        valid_mask = np.bitwise_and(split_mask, valid_mask)
-        self.image_index = np.where(valid_mask)[0] # split index
-
-        if num_im > -1:
-            self.image_index = self.image_index[:num_im]
-
-        # override split mask
-        split_mask = np.zeros_like(data_split).astype(bool)
-        split_mask[self.image_index] = True  # build a split mask
-        # if use all images
-        self.im_sizes = np.vstack([self.im_h5['image_widths'][split_mask],
-                                   self.im_h5['image_heights'][split_mask]]).transpose()
-
-        # h5 file is in 1-based index
-        self.im_to_first_box = self.roi_h5['img_to_first_box'][split_mask]
-        self.im_to_last_box = self.roi_h5['img_to_last_box'][split_mask]
-        self.all_boxes = self.roi_h5['boxes_%i' % im_scale][:]  # will index later
-        self.all_boxes[:, :2] = self.all_boxes[:, :2]
-        assert(np.all(self.all_boxes[:, :2] >= 0))  # sanity check
-        assert(np.all(self.all_boxes[:, 2:] > 0))  # no empty box
-
-
-        # convert from xc, yc, w, h to x1, y1, x2, y2
-        self.all_boxes[:, :2] = self.all_boxes[:, :2] - self.all_boxes[:, 2:]/2
-        self.all_boxes[:, 2:] = self.all_boxes[:, :2] + self.all_boxes[:, 2:]
-        self.labels = self.roi_h5['labels'][:,0]
-
         # add background class
         self.info['label_to_idx']['__background__'] = 0
         self.class_to_ind = self.info['label_to_idx']
         self.ind_to_classes = sorted(self.class_to_ind, key=lambda k:
                                self.class_to_ind[k])
         # cfg.ind_to_class = self.ind_to_classes
 
-        # load relation labels
-        self.im_to_first_rel = self.roi_h5['img_to_first_rel'][split_mask]
-        self.im_to_last_rel = self.roi_h5['img_to_last_rel'][split_mask]
-        self._relations = self.roi_h5['relationships'][:]
-        self._relation_predicates = self.roi_h5['predicates'][:,0]
-        assert(self.im_to_first_rel.shape[0] == self.im_to_last_rel.shape[0])
-        assert(self._relations.shape[0] == self._relation_predicates.shape[0]) # sanity check
         self.predicate_to_ind = self.info['predicate_to_idx']
         self.predicate_to_ind['__background__'] = 0
         self.ind_to_predicates = sorted(self.predicate_to_ind, key=lambda k:
                                   self.predicate_to_ind[k])
-
         # cfg.ind_to_predicate = self.ind_to_predicates
 
-
         self.split_mask, self.image_index, self.im_sizes, self.gt_boxes, self.gt_classes, self.relationships = load_graphs(
             self.roidb_file, self.image_file,
-            self.split, num_im, num_val_im=5000,
-            filter_empty_rels=True,
-            filter_non_overlap=False and split == "train",
+            self.split, num_im, num_val_im=num_val_im,
+            filter_empty_rels=filter_empty_rels,
+            filter_non_overlap=filter_non_overlap and split == "train",
         )
 
         self.json_category_id_to_contiguous_id = self.class_to_ind
@@ -102,8 +60,6 @@ def __init__(self, cfg, split="train", transforms=None, num_im=-1):
             v: k for k, v in self.json_category_id_to_contiguous_id.items()
         }
 
-        # self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
-
     @property
     def coco(self):
         """
@@ -142,36 +98,6 @@ def _im_getter(self, idx):
     def __len__(self):
         return len(self.image_index)
 
-    # def __getitem__(self, index):
-    #     """
-    #     get dataset item
-    #     """
-    #     i = index; assert(self.im_to_first_box[i] >= 0)
-    #     # get image
-    #     img = Image.fromarray(self._im_getter(i)); width, height = img.size
-    #
-    #     # get object bounding boxes, labels and relations
-    #     obj_boxes = self.all_boxes[self.im_to_first_box[i]:self.im_to_last_box[i]+1,:]
-    #     obj_labels = self.labels[self.im_to_first_box[i]:self.im_to_last_box[i]+1]
-    #     obj_relations = np.zeros((obj_boxes.shape[0], obj_boxes.shape[0]))
-    #     if self.im_to_first_rel[i] >= 0: # if image has relations
-    #         predicates = self._relation_predicates[self.im_to_first_rel[i]
-    #                                      :self.im_to_last_rel[i]+1]
-    #         obj_idx = self._relations[self.im_to_first_rel[i]
-    #                                      :self.im_to_last_rel[i]+1]
-    #         obj_idx = obj_idx - self.im_to_first_box[i]
-    #         assert(np.all(obj_idx>=0) and np.all(obj_idx<obj_boxes.shape[0])) # sanity check
-    #         for j, p in enumerate(predicates):
-    #             # gt_relations.append([obj_idx[j][0], obj_idx[j][1], p])
-    #             obj_relations[obj_idx[j][0], obj_idx[j][1]] = p
-    #
-    #     target_raw = BoxList(obj_boxes, (width, height), mode="xyxy")
-    #     img, target = self.transforms(img, target_raw)
-    #     target.add_field("labels", torch.from_numpy(obj_labels))
-    #     target.add_field("pred_labels", torch.from_numpy(obj_relations))
-    #     target = target.clip_to_image(remove_empty=False)
-    #     return img, target, index
-
     def __getitem__(self, index):
         """
         get dataset item
@@ -184,6 +110,16 @@ def __getitem__(self, index):
         obj_labels = self.gt_classes[index].copy()
         obj_relation_triplets = self.relationships[index].copy()
 
+        if self.filter_duplicate_rels:
+            # Filter out dupes!
+            assert self.split == 'train'
+            old_size = obj_relation_triplets.shape[0]
+            all_rel_sets = defaultdict(list)
+            for (o0, o1, r) in obj_relation_triplets:
+                all_rel_sets[(o0, o1)].append(r)
+            obj_relation_triplets = [(k[0], k[1], np.random.choice(v)) for k,v in all_rel_sets.items()]
+            obj_relation_triplets = np.array(obj_relation_triplets)
+
         obj_relations = np.zeros((obj_boxes.shape[0], obj_boxes.shape[0]))
 
         for i in range(obj_relation_triplets.shape[0]):
@@ -209,6 +145,15 @@ def get_groundtruth(self, index):
         obj_labels = self.gt_classes[index].copy()
         obj_relation_triplets = self.relationships[index].copy()
 
+        if self.filter_duplicate_rels:
+            # Filter out dupes!
+            assert self.split == 'train'
+            old_size = obj_relation_triplets.shape[0]
+            all_rel_sets = defaultdict(list)
+            for (o0, o1, r) in obj_relation_triplets:
+                all_rel_sets[(o0, o1)].append(r)
+            obj_relation_triplets = [(k[0], k[1], np.random.choice(v)) for k,v in all_rel_sets.items()]
+            obj_relation_triplets = np.array(obj_relation_triplets)
 
         obj_relations = np.zeros((obj_boxes.shape[0], obj_boxes.shape[0]))
 
@@ -229,35 +174,6 @@ def get_img_info(self, img_id):
         w, h = self.im_sizes[img_id, :]
         return {"height": h, "width": w}
 
-    # def get_groundtruth(self, index):
-    #     i = index; assert(self.im_to_first_box[i] >= 0)
-    #     width, height = self.im_sizes[i, :]
-    #     # get object bounding boxes, labels and relations
-    #     obj_boxes = self.all_boxes[self.im_to_first_box[i]:self.im_to_last_box[i]+1,:]
-    #     obj_labels = self.labels[self.im_to_first_box[i]:self.im_to_last_box[i]+1]
-    #     obj_relations = np.zeros((obj_boxes.shape[0], obj_boxes.shape[0]))
-    #     obj_relation_triplets = np.zeros((self.im_to_last_rel[i] - self.im_to_first_rel[i] + 1, 3))
-    #     if self.im_to_first_rel[i] >= 0: # if image has relations
-    #         predicates = self._relation_predicates[self.im_to_first_rel[i]
-    #                                      :self.im_to_last_rel[i]+1]
-    #         obj_idx = self._relations[self.im_to_first_rel[i]
-    #                                      :self.im_to_last_rel[i]+1]
-    #         obj_idx = obj_idx - self.im_to_first_box[i]
-    #         assert(np.all(obj_idx>=0) and np.all(obj_idx<obj_boxes.shape[0])) # sanity check
-    #         for j, p in enumerate(predicates):
-    #             # gt_relations.append([obj_idx[j][0], obj_idx[j][1], p])
-    #             obj_relations[obj_idx[j][0], obj_idx[j][1]] = p
-    #             obj_relation_triplets[j][0] = obj_idx[j][0]
-    #             obj_relation_triplets[j][1] = obj_idx[j][1]
-    #             obj_relation_triplets[j][2] = p
-    #
-    #     target = BoxList(obj_boxes, (width, height), mode="xyxy")
-    #     target.add_field("labels", torch.from_numpy(obj_labels))
-    #     target.add_field("pred_labels", torch.from_numpy(obj_relations))
-    #     target.add_field("relation_labels", torch.from_numpy(obj_relation_triplets))
-    #     target.add_field("difficult", torch.from_numpy(obj_labels).clone().fill_(0))
-    #     return target
-
     def map_class_id_to_class_name(self, class_id):
         return self.ind_to_classes[class_id]
 
@@ -353,7 +269,7 @@ def load_graphs(graphs_file, images_file, mode='train', num_im=-1, num_val_im=0,
 
         if filter_non_overlap:
             assert mode == 'train'
-            inters = bbox_overlaps(torch.from_numpy(boxes_i), torch.from_numpy(boxes_i)).numpy()
+            inters = bbox_overlaps(torch.from_numpy(boxes_i).float(), torch.from_numpy(boxes_i).float()).numpy()
             rel_overs = inters[rels[:, 0], rels[:, 1]]
             inc = np.where(rel_overs > 0.0)[0]
 
 
@@ -31,13 +31,20 @@ def __init__(self, cfg, arguments, local_rank, distributed):
         self.data_loader_train = build_data_loader(cfg, split="train", is_distributed=distributed)
         self.data_loader_test = build_data_loader(cfg, split="test", is_distributed=distributed)
 
+        logger = logging.getLogger("scene_graph_generation.trainer")
+        logger.info("Train data size: {}".format(len(self.data_loader_train.dataset)))
+        logger.info("Test data size: {}".format(len(self.data_loader_test.dataset)))
+
         if not os.path.exists("freq_prior.npy"):
-            freq_prior = self._get_freq_prior()
-            np.save("freq_prior.npy", freq_prior)
-        else:
-            freq_prior = np.load("freq_prior.npy")
+            logger.info("Computing frequency prior matrix...")
+            fg_matrix, bg_matrix = self._get_freq_prior()
+            prob_matrix = fg_matrix.astype(np.float32)
+            prob_matrix[:,:,0] = bg_matrix
 
-        self.freq_prior = freq_prior
+            prob_matrix[:,:,0] += 1
+            prob_matrix /= np.sum(prob_matrix, 2)[:,:,None]
+            # prob_matrix /= float(fg_matrix.max())
+            np.save("freq_prior.npy", prob_matrix)
 
         # build scene graph generation model
         self.scene_parser = build_scene_parser(cfg); self.scene_parser.to(self.device)
@@ -46,34 +53,59 @@ def __init__(self, cfg, arguments, local_rank, distributed):
 
         self.arguments.update(self.extra_checkpoint_data)
 
-    def _get_freq_prior(self):
-        """
-        get the frequency prior for object-pair v.s. predicate
-        """
-        freq_prior = np.zeros((self.cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES,
-                              self.cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES,
-                              self.cfg.MODEL.ROI_RELATION_HEAD.NUM_CLASSES))
-
-        for i in range(len(self.data_loader_train.dataset)):
-            target = self.data_loader_train.dataset.get_groundtruth(i)
-            boxes = target.bbox
-            overlaps = bbox_overlaps(boxes, boxes)
-            labels = target.get_field("labels")
-            pred_labels = target.get_field("pred_labels")
-            for m in range(pred_labels.size(0)):
-                for n in range(pred_labels.size(1)):
-                    if pred_labels[m, n] > 0:
-                        label_m = labels[m].item()
-                        label_n = labels[n].item()
-                        freq_prior[label_m, label_n][int(pred_labels[m, n].item())] += 1
-                    else:
-                        if overlaps[m, n] > 0 and m != n:
-                            freq_prior[label_m, label_n][0] += 1
-            if i % 20 == 0:
-                print("processing {}/{}".format(i, len(self.data_loader_train.dataset)))
-            if i >= len(self.data_loader_train.dataset):
-                break
-        return freq_prior
+    def _get_freq_prior(self, must_overlap=False):
+
+        fg_matrix = np.zeros((
+            self.cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES,
+            self.cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES,
+            self.cfg.MODEL.ROI_RELATION_HEAD.NUM_CLASSES
+            ), dtype=np.int64)
+
+        bg_matrix = np.zeros((
+            self.cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES,
+            self.cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES,
+        ), dtype=np.int64)
+
+        for ex_ind in range(len(self.data_loader_train.dataset)):
+            gt_classes = self.data_loader_train.dataset.gt_classes[ex_ind].copy()
+            gt_relations = self.data_loader_train.dataset.relationships[ex_ind].copy()
+            gt_boxes = self.data_loader_train.dataset.gt_boxes[ex_ind].copy()
+
+            # For the foreground, we'll just look at everything
+            o1o2 = gt_classes[gt_relations[:, :2]]
+            for (o1, o2), gtr in zip(o1o2, gt_relations[:,2]):
+                fg_matrix[o1, o2, gtr] += 1
+
+            # For the background, get all of the things that overlap.
+            o1o2_total = gt_classes[np.array(
+                self._box_filter(gt_boxes, must_overlap=must_overlap), dtype=int)]
+            for (o1, o2) in o1o2_total:
+                bg_matrix[o1, o2] += 1
+
+            if ex_ind % 20 == 0:
+                print("processing {}/{}".format(ex_ind, len(self.data_loader_train.dataset)))
+
+        return fg_matrix, bg_matrix
+
+    def _box_filter(self, boxes, must_overlap=False):
+        """ Only include boxes that overlap as possible relations.
+        If no overlapping boxes, use all of them."""
+        n_cands = boxes.shape[0]
+
+        overlaps = bbox_overlaps(torch.from_numpy(boxes.astype(np.float)), torch.from_numpy(boxes.astype(np.float))).numpy() > 0
+        np.fill_diagonal(overlaps, 0)
+
+        all_possib = np.ones_like(overlaps, dtype=np.bool)
+        np.fill_diagonal(all_possib, 0)
+
+        if must_overlap:
+            possible_boxes = np.column_stack(np.where(overlaps))
+
+            if possible_boxes.size == 0:
+                possible_boxes = np.column_stack(np.where(all_possib))
+        else:
+            possible_boxes = np.column_stack(np.where(all_possib))
+        return possible_boxes
 
     def train(self):
         """
@@ -260,7 +292,7 @@ def test(self, timer=None, visualize=False):
                         predictions=predictions,
                         output_folder=output_folder,
                         **extra_args)
-        
+
         if self.cfg.MODEL.RELATION_ON:
             eval_sg_results = evaluate_sg(dataset=self.data_loader_test.dataset,
                             predictions=predictions,