added classifier

2020-10-31 20:36:43 +01:00 · 2020-10-31 20:36:43 +01:00 · 03d26b46ca
parent e9585706b9
commit 03d26b46ca
13 changed files with 2281 additions and 152 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,3 +8,5 @@ short.mp4
 __pycache__/

 *.mp4
+
+*.weights
--- a/Application/Classifiers/Classifier.py
+++ b/Application/Classifiers/Classifier.py
@ -5,99 +5,57 @@
 import numpy as np
 import tensorflow as tf
 import cv2
+import os 
+import json

 from Application.Classifiers.ClassifierInterface import ClassifierInterface


 class Classifier(ClassifierInterface):
    def __init__(self):
-        print("1")
-        self.model_path = "./class1.pb"
-        self.odapi = DetectorAPI(path_to_ckpt=self.model_path)
-        self.threshold = 0.6  
+        self.threshold = .5
+        with open(os.path.join(os.path.dirname(__file__), "coco_map.json")) as file:
+            mapping = json.load(file)
+            self.classes = dict()
+            for element in mapping:
+                self.classes[element["id"]-1] = element["display_name"]

-    def detect(self, stream):
-        cap = cv2.VideoCapture(stream)
-        img = None
-        r, img = cap.read()
-        if img is None:
-            return img
-        # scale the image down for faster processing
-        scale_percent = 60 # percent of original size
-        width = int(img.shape[1] * scale_percent / 100)
-        height = int(img.shape[0] * scale_percent / 100)
-        dim = (width, height)
+        self.net = cv2.dnn.readNet(os.path.join(os.path.dirname(__file__),"yolov4.weights"),os.path.join(os.path.dirname(__file__),"yolov4.cfg"))
+        #self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
+        #self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
+        self.layer_names = self.net.getLayerNames()
+        self.outputlayers = [self.layer_names[i[0] - 1] for i in self.net.getUnconnectedOutLayers()]   

-        img = cv2.resize(img, dim)
+        print("Classifier Initiated")  

+    def tagLayer(self, imgs):
        # get the results from the net
-        boxes, scores, classes, num = self.odapi.process_frame(img)
-        res = False
-        for i in range(len(boxes)):
-            # Class 1 represents human
-            # draw recogniction boxes and return resulting image + true/false
-            if classes[i] == 1:
-                if scores[i] > self.threshold:
-                    box = boxes[i]
-                    cv2.rectangle(img, (box[1], box[0]), (box[3], box[2]), (255, 0, 0), 2)
-                    res = True
-                    return img, res
+
+        results = []
+        for i, contours in enumerate(imgs[19:20]):
+            #print(i)
+            for contour in contours:
+                height,width,channels = contour.shape
+  
+                dim = max(height, width)
+                if dim > 320:
+                    img2 = np.zeros(shape=[dim, dim, 3], dtype=np.uint8)  
                else:
-                    res = False
-                    return img, res
-
-
-    def tagLayers(self, layers):
-        print("tagging")
-    # Detector API can be changed out given the I/O remains the same
-    # this way you can use a different N-Net if you like to
-    class DetectorAPI:
-        def __init__(self, path_to_ckpt):
-            self.path_to_ckpt = path_to_ckpt
-
-            self.detection_graph = tf.Graph()
-            with self.detection_graph.as_default():
-                od_graph_def = tf.GraphDef()
-                with tf.gfile.GFile(self.path_to_ckpt, 'rb') as fid:
-                    serialized_graph = fid.read()
-                    od_graph_def.ParseFromString(serialized_graph)
-                    tf.import_graph_def(od_graph_def, name='')
-
-            self.default_graph = self.detection_graph.as_default()
-            self.sess = tf.Session(graph=self.detection_graph)
-
-            # Definite input and output Tensors for detection_graph
-            self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
-            # Each box represents a part of the image where a particular object was detected.
-            self.detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
-            # Each score represent how level of confidence for each of the objects.
-            # Score is shown on the result image, together with the class label.
-            self.detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
-            self.detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
-            self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
-
-        def process_frame(self, image):
-            # Expand dimensions since the trained_model expects images to have shape: [1, None, None, 3]
-            image_np_expanded = np.expand_dims(image, axis=0)
-            # Actual detection.
-
-            (boxes, scores, classes, num) = self.sess.run(
-                [self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections],
-                feed_dict={self.image_tensor: image_np_expanded})
-
-            im_height, im_width,_ = image.shape
-            boxes_list = [None for i in range(boxes.shape[1])]
-            for i in range(boxes.shape[1]):
-                boxes_list[i] = (
-                    int(boxes[0, i, 0] * im_height),
-                    int(boxes[0, i, 1] * im_width),
-                    int(boxes[0, i, 2] * im_height),
-                    int(boxes[0, i, 3] * im_width)
-                )
-
-            return boxes_list, scores[0].tolist(), [int(x) for x in classes[0].tolist()], int(num[0])
-
-        def close(self):
-            self.sess.close()
-            self.default_graph.close()
-
+                    img2 = np.zeros(shape=[320,320, 3], dtype=np.uint8)  
+                img2[:height,:width] = contour
+                blob = cv2.dnn.blobFromImage(img2,1/256,(320,320),(0,0,0),True,crop=False) #reduce 416 to 320    
+                self.net.setInput(blob)
+                outs = self.net.forward(self.outputlayers)
+                for out in outs:
+                    for detection in out:
+                        scores = detection
+                        class_id = np.argmax(scores)
+                        confidence = scores[class_id]
+                        if confidence > self.threshold:
+                            if self.classes[class_id] not in results:
+                                cv2.imshow("changes x", img2)
+                                cv2.waitKey(10) & 0XFF
+                                results.append(self.classes[class_id])
+                            #print(self.classes[x], score)
+            
+        return results
--- a/Application/Classifiers/coco_map.json
+++ b/Application/Classifiers/coco_map.json
@ -0,0 +1,185 @@
+[
+   {
+  "name": "/m/01g317"
+  ,"id": 1
+  ,"display_name": "person"
+  },
+ {
+  "name": "/m/0199g"
+  ,"id": 2
+  ,"display_name": "bicycle"
+  },
+ {
+  "name": "/m/0k4j"
+  ,"id": 3
+  ,"display_name": "car"
+  },
+ {
+  "name": "/m/04_sv"
+  ,"id": 4
+  ,"display_name": "motorcycle"
+  },
+ {
+  "name": "/m/05czz6l"
+  ,"id": 5
+  ,"display_name": "airplane"
+  },
+ {
+  "name": "/m/01bjv"
+  ,"id": 6
+  ,"display_name": "bus"
+  },
+ {
+  "name": "/m/07jdr"
+  ,"id": 7
+  ,"display_name": "train"
+  },
+ {
+  "name": "/m/07r04"
+  ,"id": 8
+  ,"display_name": "truck"
+  },
+ {
+  "name": "/m/019jd"
+  ,"id": 9
+  ,"display_name": "boat"
+  },
+ {
+  "name": "/m/015qff"
+  ,"id": 10
+  ,"display_name": "traffic light"
+  },
+ {
+  "name": "/m/01pns0"
+  ,"id": 11
+  ,"display_name": "fire hydrant"
+  },
+ {
+  "name": "/m/02pv19"
+  ,"id": 13
+  ,"display_name": "stop sign"
+  },
+ {
+  "name": "/m/015qbp"
+  ,"id": 14
+  ,"display_name": "parking meter"
+  },
+ {
+  "name": "/m/0cvnqh"
+  ,"id": 15
+  ,"display_name": "bench"
+  },
+ {
+  "name": "/m/015p6"
+  ,"id": 16
+  ,"display_name": "bird"
+  },
+ {
+  "name": "/m/01yrx"
+  ,"id": 17
+  ,"display_name": "cat"
+  },
+ {
+  "name": "/m/0bt9lr"
+  ,"id": 18
+  ,"display_name": "dog"
+  },
+ {
+  "name": "/m/03k3r"
+  ,"id": 19
+  ,"display_name": "horse"
+  },
+ {
+  "name": "/m/07bgp"
+  ,"id": 20
+  ,"display_name": "sheep"
+  },
+ {
+  "name": "/m/01xq0k1"
+  ,"id": 21
+  ,"display_name": "cow"
+  },
+ {
+  "name": "/m/0bwd_0j"
+  ,"id": 22
+  ,"display_name": "elephant"
+  },
+ {
+  "name": "/m/01dws"
+  ,"id": 23
+  ,"display_name": "bear"
+  },
+ {
+  "name": "/m/0898b"
+  ,"id": 24
+  ,"display_name": "zebra"
+  },
+ {
+  "name": "/m/03bk1"
+  ,"id": 25
+  ,"display_name": "giraffe"
+  },
+ {
+  "name": "/m/01940j"
+  ,"id": 27
+  ,"display_name": "backpack"
+  },
+ {
+  "name": "/m/0hnnb"
+  ,"id": 28
+  ,"display_name": "umbrella"
+  },
+ {
+  "name": "/m/080hkjn"
+  ,"id": 31
+  ,"display_name": "handbag"
+  },
+
+ {
+  "name": "/m/01s55n"
+  ,"id": 33
+  ,"display_name": "suitcase"
+  },
+ 
+ {
+  "name": "/m/03g8mr"
+  ,"id": 39
+  ,"display_name": "baseball bat"
+  },
+ 
+ {
+  "name": "/m/06_fw"
+  ,"id": 41
+  ,"display_name": "skateboard"
+  },
+ {
+  "name": "/m/01mzpv"
+  ,"id": 62
+  ,"display_name": "chair"
+  },
+ {
+  "name": "/m/02crq1"
+  ,"id": 63
+  ,"display_name": "couch"
+  },
+ {
+  "name": "/m/03ssj5"
+  ,"id": 65
+  ,"display_name": "bed"
+  },
+ {
+  "name": "/m/04bcr3"
+  ,"id": 67
+  ,"display_name": "dining table"
+  },
+ {
+  "name": "/m/07c52"
+  ,"id": 72
+  ,"display_name": "tv"
+  },
+ {
+  "name": "/m/01c648"
+  ,"id": 73
+  ,"display_name": "laptop"
+  }
+]
--- a/Application/Classifiers/yolov3.cfg
+++ b/Application/Classifiers/yolov3.cfg
@ -0,0 +1,789 @@
+[net]
+# Testing
+# batch=1
+# subdivisions=1
+# Training
+batch=64
+subdivisions=16
+width=608
+height=608
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
--- a/Application/Classifiers/yolov4.cfg
+++ b/Application/Classifiers/yolov4.cfg
--- a/Application/Config.py
+++ b/Application/Config.py
@ -12,7 +12,7 @@ class Config:
        "tolerance": 20,
        "maxLength": None,
        "ttolerance": 60,
-        "videoBufferLength": 128,
+        "videoBufferLength": 16,
        "noiseThreashold": 0.1,
        "noiseSensitivity": 3/4,
        "LayersPerContour": 5,
@ -20,6 +20,7 @@ class Config:
        }

    def __init__(self):
+        '''This is basically just a wrapper for a json / python dict'''
        print("Current Config:", self.c)
    
    def __getitem__(self, key):
--- a/Application/ContourExctractor.py
+++ b/Application/ContourExctractor.py
@ -21,8 +21,8 @@ from Application.Config import Config

 class ContourExtractor:

-    #X = {frame_number: [(contour, (x,y,w,h)), ...], }
-
+    #extracedContours = {frame_number: [(contour, (x,y,w,h)), ...], }
+    # dict with frame numbers as keys and the contour bounds of every contour for that frame 

    def getextractedContours(self):
        return self.extractedContours
@ -44,17 +44,15 @@ class ContourExtractor:

        print("ContourExtractor initiated")

-    def extractContours(self):
-        extractedContours = dict()        
-        videoReader = VideoReader(self.config)
-        
+    def extractContours(self):      
+        videoReader = VideoReader(self.config)    
        videoReader.fillBuffer()

        threads = self.config["videoBufferLength"]
        self.start = time.time()
+        # start a bunch of frames and let them read from the video reader buffer until the video reader reaches EOF
        with ThreadPool(threads) as pool:
            while not videoReader.videoEnded():
-                #FrameCount, frame = videoReader.pop()
                if videoReader.buffer.qsize() == 0:
                    time.sleep(.5)

@ -70,6 +68,7 @@ class ContourExtractor:
    
    def getContours(self, data):
        frameCount, frame = data
+        # wait for the reference frame, which is calculated by averaging some revious frames
        while frameCount not in self.averages:
            time.sleep(0.1)
        firstFrame = self.averages.pop(frameCount, None)
@ -77,6 +76,7 @@ class ContourExtractor:
        if frameCount % (60*30) == 0:
            print(f"{frameCount/(60*30)} Minutes processed in {round((time.time() - self.start), 2)} each")
            self.start = time.time()
+
        gray = self.prepareFrame(frame)
        frameDelta = cv2.absdiff(gray, firstFrame)
        thresh = cv2.threshold(frameDelta, self.threashold, 255, cv2.THRESH_BINARY)[1]
--- a/Application/Exporter.py
+++ b/Application/Exporter.py
@ -21,7 +21,7 @@ class Exporter:
        if raw:
            self.exportRawData(layers)
        if layered and overlayed:
-            print("Layered and Individual are mutially exclusive, Individual was choosen automatically")
+            print("Layered and Individual are mutually exclusive, individual was choosen automatically")
            overlayed = False
        if layered and not overlayed:
            self.exportLayers(layers)
--- a/Application/Layer.py
+++ b/Application/Layer.py
@ -17,6 +17,16 @@ class Layer:
    length = None

    def __init__(self, startFrame, data, config):
+        '''returns a Layer object
+        
+        Layers are collections of contours with a StartFrame, 
+        which is the number of the frame the first contour of
+        this layer was extraced from
+
+        A Contour is a CV2 Contour, which is a y*x*3 rgb numpy array,
+        but we only care about the corners of the contours. 
+        So we save the bounds (x,y,w,h) in bounds[] and the actual content in data[] 
+        '''
        self.startFrame = startFrame
        self.lastFrame = startFrame
        self.config = config
@ -25,22 +35,28 @@ class Layer:
        self.bounds.append([data])
        #print("Layer constructed")

-    def add(self, frameNumber, data):
+    def add(self, frameNumber, bound):
+        '''Adds a bound'''
        if not self.startFrame + len(self.bounds) < frameNumber:
            if len(self.bounds[self.startFrame - frameNumber]) >= 1:
-                self.bounds[self.startFrame - frameNumber].append(data)
+                self.bounds[self.startFrame - frameNumber].append(bound)
        else:
            self.lastFrame = frameNumber
-            self.bounds.append([data])
+            self.bounds.append([bound])

        self.getLength()

    def getLength(self):
+        return len(self)
+
+    def __len__(self):
        self.length = len(self.bounds)
        return self.length
    
    def fill(self, inputPath, resizeWidth):
-        '''reads in the contour data, needed for export'''
+        '''deprecated
+        
+        Fills the data[] array by iterateing over the bounds'''
        
        cap = cv2.VideoCapture(inputPath) 
        self.data = [None]*len(self.bounds)
@ -57,12 +73,18 @@ class Layer:
        cap.release()

    def clusterDelete(self):
+        '''Uses a cluster analysis to remove contours which are not the result of movement'''
        org = self.bounds
+        if len(org) == 1:
+            return
        mapped = []
        mapping = []
        clusterCount = 1
        noiseSensitivity = self.config["noiseSensitivity"] 
        noiseThreashold = self.config["noiseThreashold"]
+
+        # calculates the middle of each contour in the 2d bounds[] and saves it in 1d list
+        # and saves the 2d indexes in a mapping array
        for i, bounds in enumerate(org):
            for j, bound in enumerate(bounds):
                x = (bound[0] + bound[2]/2) / self.config["w"]
@ -76,6 +98,7 @@ class Layer:
        centers = []
        kmeans = None

+        # the loop isn't nessecary (?) if the number of clusters is known, since it isn't the loop tries to optimize
        while True:
            kmeans = KMeans(init="random", n_clusters=clusterCount, n_init=5, max_iter=300, random_state=42)
            kmeans.fit(mapped)
@ -96,12 +119,21 @@ class Layer:
                centers = kmeans.cluster_centers_
                break

+        # transformes the labels array
+        # new array:
+        # the index is the cluster id, the array is the id of the contour 
+        # [
+        # [1,2,3]
+        # [3,4,5]
+        # [6,7,8,9]   
+        # ]
        classed = [[]]
        for i, x in enumerate(list(labels)):
            while len(classed) <= x:
                classed.append([])
            classed[x].append(i)

+        # calculates the euclidean distance (without the sqrt) of each point in a cluster to the cluster center
        dists = []
        for num, cen in enumerate(centers):
            dist = 0
@ -110,9 +142,10 @@ class Layer:
            dist/=len(classed[num])
            dists.append(dist*1000)

+        # copy all contours of the clusters with more movement than the threshold
        newContours = [[]]
        for i, dis in enumerate(dists):
-            # copy contours which are spread out, delete rest by not yopying them 
+            # copy contours which are spread out, delete rest by not copying them 
            if dis > noiseThreashold:
                for j in classed[i]:
                    x, y = mapping[j]
--- a/Application/LayerFactory.py
+++ b/Application/LayerFactory.py
@ -23,6 +23,7 @@ class LayerFactory:
            self.extractLayers(data)

    def extractLayers(self, data = None):
+        '''Bundle given contours together into Layer Objects'''
        if self.data is None:
            if data is None:
                print("LayerFactory data was none")
@ -48,8 +49,6 @@ class LayerFactory:
                #for x in tmp:
                    #self.getLayers(x)

-        
-        
        return self.layers

    def getLayers(self, data):
--- a/Application/LayerManager.py
+++ b/Application/LayerManager.py
@ -3,9 +3,10 @@ from Application.Config import Config
 from Application.VideoReader import VideoReader
 from Application.Exporter import Exporter
 from multiprocessing.pool import ThreadPool
+from Application.Classifiers.Classifier import Classifier
 import cv2
 import numpy as np
-
+import time
 class LayerManager:
    def __init__(self, config, layers):
        self.data = {}
@ -17,12 +18,16 @@ class LayerManager:
        self.resizeWidth = config["resizeWidth"]
        self.footagePath = config["inputPath"]
        self.config = config
+        self.classifier = Classifier()
+        self.tags = []
        print("LayerManager constructed")

+
+
    def cleanLayers(self):
        self.freeMin()
        self.sortLayers()            
-        self.cleanLayers()
+        #self.cleanLayers2()
        self.freeMax()

    def removeStaticLayers(self):
@ -52,7 +57,7 @@ class LayerManager:
            if l.getLength() > self.minLayerLength:
                layers.append(l) 
        self.layers = layers
-        self.removeStaticLayers()
+        
    
    def freeMax(self):
        layers = []
@ -60,39 +65,45 @@ class LayerManager:
            if l.getLength() < self.maxLayerLength:
                layers.append(l) 
        self.layers = layers
-        self.removeStaticLayers()
+        

-    def fillLayers(self):
+    def tagLayers(self):
+        '''Use classifieres the tag all Layers, by reading the contour content from the original video, then applying the classifier'''
+        exporter = Exporter(self.config)
+        start = time.time()
+        for i, layer in enumerate(self.layers):
+            print(f"{round(i/len(self.layers)*100,2)} {round((time.time() - start), 2)}")
+            start = time.time()
+            if len(layer.bounds[0]) == 0:
+                continue
+            listOfFrames = exporter.makeListOfFrames([layer])

-        listOfFrames = Exporter(self.config).makeListOfFrames(self.layers)
-        videoReader = VideoReader(self.config, listOfFrames)
-        videoReader.fillBuffer()
+            videoReader = VideoReader(self.config, listOfFrames)
+            videoReader.fillBuffer()

-        while not videoReader.videoEnded():
-            frameCount, frame = videoReader.pop()
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            for i, layer in enumerate(self.layers):
-                if i % 20 == 0:
-                    print(f"filled {int(round(i/len(self.layers),2)*100)}% of all Layers")
-                
-                if layer.startFrame <= frameCount and layer.startFrame + len(layer.bounds) > frameCount:
-                    data = []
-                    for (x, y, w, h) in layer.bounds[frameCount - layer.startFrame]:
-                        if x is None:
-                            break
-                        factor = videoReader.w / self.resizeWidth
-                        x = int(x * factor)
-                        y = int(y * factor)
-                        w = int(w * factor)
-                        h = int(h * factor)
-                        data.append(np.copy(frame[y:y+h, x:x+w]))
-                    layer.data.append(data)
+            while not videoReader.videoEnded():
+                frameCount, frame = videoReader.pop()
+                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                data = []
+                for (x, y, w, h) in layer.bounds[frameCount - layer.startFrame]:
+                    if x is None:
+                        break
+                    factor = videoReader.w / self.resizeWidth
+                    x = int(x * factor)
+                    y = int(y * factor)
+                    w = int(w * factor)
+                    h = int(h * factor)
+                    data.append(np.copy(frame[y:y+h, x:x+w]))
+                layer.data.append(data)
+            tags = self.classifier.tagLayer(layer.data)
+            print(tags)
+            self.tags.append(tags)

-        videoReader.thread.join()
+            videoReader.thread.join()

    def sortLayers(self):
        self.layers.sort(key = lambda c:c.startFrame)

-    def cleanLayers(self):
+    def cleanLayers2(self):
        for layer in self.layers:
            layer.clusterDelete()
--- a/Application/VideoReader.py
+++ b/Application/VideoReader.py
@ -24,11 +24,11 @@ class VideoReader:
        self.buffer = Queue(config["videoBufferLength"])
        self.vc = cv2.VideoCapture(videoPath)
        self.stopped = False
-        self.getWH()
        if setOfFrames is not None:
            self.listOfFrames = sorted(setOfFrames)      

    def getWH(self):
+        '''get width and height'''
        res, image = self.vc.read()
        self.w = image.shape[1]
        self.h = image.shape[0]
@ -37,15 +37,11 @@ class VideoReader:
    def pop(self):
        return self.buffer.get(block=True)

-    def get(self):
-        return self.buffer[-1]
-
    def fillBuffer(self):
        if self.buffer.full():
            print("VideoReader::fillBuffer was called when buffer was full.")
        self.endFrame = int(self.vc.get(cv2.CAP_PROP_FRAME_COUNT))

-        #self.endFrame = 10*60*30
        if self.listOfFrames is not None:
            self.thread = threading.Thread(target=self.readFramesByList, args=())
        else:
@ -57,6 +53,7 @@ class VideoReader:
        self.vc.release()

    def readFrames(self):
+        '''Reads video from start to finish'''
        while self.lastFrame < self.endFrame:
            res, frame = self.vc.read()
            if res:
@ -64,9 +61,9 @@ class VideoReader:
            self.lastFrame += 1

        self.stopped = True
-
    
    def readFramesByList(self):
+        '''Reads all frames from a list of frame numbers'''
        self.vc.set(1, self.listOfFrames[0])
        self.lastFrame = self.listOfFrames[0]
        self.endFrame = self.listOfFrames[-1]
@ -76,6 +73,8 @@ class VideoReader:
                res, frame = self.vc.read()
                if res:
                    self.buffer.put((self.lastFrame, frame))
+                else:
+                    print("READING FRAMES IS FALSE")
                # since the list is sorted the first element is always the lowest relevant framenumber
                # [0,1,2,3,32,33,34,35,67,68,69]
                self.listOfFrames.pop(0)
--- a/main.py
+++ b/main.py
@ -9,44 +9,39 @@ from Application.Importer import Importer
 from Application.VideoReader import VideoReader
 from Application.LayerManager import LayerManager
 from Application.Classifiers import *
-#TODO
-#   finden von relevanten Stellen anhand von zu findenen metriken für vergleichsbilder

-def demo():
-    print("startup")
+def main():
    start = time.time()
    config = Config()

-    config["inputPath"] = os.path.join(os.path.dirname(__file__), "generate test footage/3.mp4")
-    config["importPath"] = os.path.join(os.path.dirname(__file__), "output/short.txt")
+    config["inputPath"] = os.path.join(os.path.dirname(__file__), "generate test footage/out.mp4")
+    #config["importPath"] = os.path.join(os.path.dirname(__file__), "output/short.txt")
    config["outputPath"]  = os.path.join(os.path.dirname(__file__), "output/short.mp4")

    vr = VideoReader(config)
    config["w"], config["h"] = vr.getWH()

    if config["importPath"] is None:
-        #ana = Analyzer(config)
-        #ref = ana.avg
        contours = ContourExtractor(config).extractContours()
        print("Time consumed extracting: ", time.time() - start)
        layerFactory = LayerFactory(config)
        
        layers = layerFactory.extractLayers(contours)
-        layerManager = LayerManager(config, layers)
-        layerManager.cleanLayers()
-        layers = layerManager.layers
+
    else:
        layers = Importer(config).importRawData()

+    layerManager = LayerManager(config, layers)
+    layerManager.cleanLayers()
+
+    #layerManager.tagLayers()
+    layers = layerManager.layers
    exporter = Exporter(config)
-    exporter.export(layers)
+    exporter.export(layers, raw=False)
    
    print("Total time: ", time.time() - start)

-def init():
-    print("not needed yet")
-
 if __name__ == "__main__":
-    demo()
+    main()