diff --git a/.gitignore b/.gitignore index ea407d9..bffad8d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ short.mp4 __pycache__/ *.mp4 + +*.weights \ No newline at end of file diff --git a/Application/Classifiers/Classifier.py b/Application/Classifiers/Classifier.py index bca21d3..589c27d 100644 --- a/Application/Classifiers/Classifier.py +++ b/Application/Classifiers/Classifier.py @@ -5,99 +5,57 @@ import numpy as np import tensorflow as tf import cv2 +import os +import json from Application.Classifiers.ClassifierInterface import ClassifierInterface class Classifier(ClassifierInterface): def __init__(self): - print("1") - self.model_path = "./class1.pb" - self.odapi = DetectorAPI(path_to_ckpt=self.model_path) - self.threshold = 0.6 + self.threshold = .5 + with open(os.path.join(os.path.dirname(__file__), "coco_map.json")) as file: + mapping = json.load(file) + self.classes = dict() + for element in mapping: + self.classes[element["id"]-1] = element["display_name"] - def detect(self, stream): - cap = cv2.VideoCapture(stream) - img = None - r, img = cap.read() - if img is None: - return img - # scale the image down for faster processing - scale_percent = 60 # percent of original size - width = int(img.shape[1] * scale_percent / 100) - height = int(img.shape[0] * scale_percent / 100) - dim = (width, height) + self.net = cv2.dnn.readNet(os.path.join(os.path.dirname(__file__),"yolov4.weights"),os.path.join(os.path.dirname(__file__),"yolov4.cfg")) + #self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) + #self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) + self.layer_names = self.net.getLayerNames() + self.outputlayers = [self.layer_names[i[0] - 1] for i in self.net.getUnconnectedOutLayers()] - img = cv2.resize(img, dim) + print("Classifier Initiated") + def tagLayer(self, imgs): # get the results from the net - boxes, scores, classes, num = self.odapi.process_frame(img) - res = False - for i in range(len(boxes)): - # Class 1 represents human - # draw recogniction boxes and return resulting image + true/false - if classes[i] == 1: - if scores[i] > self.threshold: - box = boxes[i] - cv2.rectangle(img, (box[1], box[0]), (box[3], box[2]), (255, 0, 0), 2) - res = True - return img, res + + results = [] + for i, contours in enumerate(imgs[19:20]): + #print(i) + for contour in contours: + height,width,channels = contour.shape + + dim = max(height, width) + if dim > 320: + img2 = np.zeros(shape=[dim, dim, 3], dtype=np.uint8) else: - res = False - return img, res - - - def tagLayers(self, layers): - print("tagging") - # Detector API can be changed out given the I/O remains the same - # this way you can use a different N-Net if you like to - class DetectorAPI: - def __init__(self, path_to_ckpt): - self.path_to_ckpt = path_to_ckpt - - self.detection_graph = tf.Graph() - with self.detection_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(self.path_to_ckpt, 'rb') as fid: - serialized_graph = fid.read() - od_graph_def.ParseFromString(serialized_graph) - tf.import_graph_def(od_graph_def, name='') - - self.default_graph = self.detection_graph.as_default() - self.sess = tf.Session(graph=self.detection_graph) - - # Definite input and output Tensors for detection_graph - self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0') - # Each box represents a part of the image where a particular object was detected. - self.detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0') - # Each score represent how level of confidence for each of the objects. - # Score is shown on the result image, together with the class label. - self.detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0') - self.detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0') - self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0') - - def process_frame(self, image): - # Expand dimensions since the trained_model expects images to have shape: [1, None, None, 3] - image_np_expanded = np.expand_dims(image, axis=0) - # Actual detection. - - (boxes, scores, classes, num) = self.sess.run( - [self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections], - feed_dict={self.image_tensor: image_np_expanded}) - - im_height, im_width,_ = image.shape - boxes_list = [None for i in range(boxes.shape[1])] - for i in range(boxes.shape[1]): - boxes_list[i] = ( - int(boxes[0, i, 0] * im_height), - int(boxes[0, i, 1] * im_width), - int(boxes[0, i, 2] * im_height), - int(boxes[0, i, 3] * im_width) - ) - - return boxes_list, scores[0].tolist(), [int(x) for x in classes[0].tolist()], int(num[0]) - - def close(self): - self.sess.close() - self.default_graph.close() - + img2 = np.zeros(shape=[320,320, 3], dtype=np.uint8) + img2[:height,:width] = contour + blob = cv2.dnn.blobFromImage(img2,1/256,(320,320),(0,0,0),True,crop=False) #reduce 416 to 320 + self.net.setInput(blob) + outs = self.net.forward(self.outputlayers) + for out in outs: + for detection in out: + scores = detection + class_id = np.argmax(scores) + confidence = scores[class_id] + if confidence > self.threshold: + if self.classes[class_id] not in results: + cv2.imshow("changes x", img2) + cv2.waitKey(10) & 0XFF + results.append(self.classes[class_id]) + #print(self.classes[x], score) + + return results diff --git a/Application/Classifiers/coco_map.json b/Application/Classifiers/coco_map.json new file mode 100644 index 0000000..46fb261 --- /dev/null +++ b/Application/Classifiers/coco_map.json @@ -0,0 +1,185 @@ +[ + { + "name": "/m/01g317" + ,"id": 1 + ,"display_name": "person" + }, + { + "name": "/m/0199g" + ,"id": 2 + ,"display_name": "bicycle" + }, + { + "name": "/m/0k4j" + ,"id": 3 + ,"display_name": "car" + }, + { + "name": "/m/04_sv" + ,"id": 4 + ,"display_name": "motorcycle" + }, + { + "name": "/m/05czz6l" + ,"id": 5 + ,"display_name": "airplane" + }, + { + "name": "/m/01bjv" + ,"id": 6 + ,"display_name": "bus" + }, + { + "name": "/m/07jdr" + ,"id": 7 + ,"display_name": "train" + }, + { + "name": "/m/07r04" + ,"id": 8 + ,"display_name": "truck" + }, + { + "name": "/m/019jd" + ,"id": 9 + ,"display_name": "boat" + }, + { + "name": "/m/015qff" + ,"id": 10 + ,"display_name": "traffic light" + }, + { + "name": "/m/01pns0" + ,"id": 11 + ,"display_name": "fire hydrant" + }, + { + "name": "/m/02pv19" + ,"id": 13 + ,"display_name": "stop sign" + }, + { + "name": "/m/015qbp" + ,"id": 14 + ,"display_name": "parking meter" + }, + { + "name": "/m/0cvnqh" + ,"id": 15 + ,"display_name": "bench" + }, + { + "name": "/m/015p6" + ,"id": 16 + ,"display_name": "bird" + }, + { + "name": "/m/01yrx" + ,"id": 17 + ,"display_name": "cat" + }, + { + "name": "/m/0bt9lr" + ,"id": 18 + ,"display_name": "dog" + }, + { + "name": "/m/03k3r" + ,"id": 19 + ,"display_name": "horse" + }, + { + "name": "/m/07bgp" + ,"id": 20 + ,"display_name": "sheep" + }, + { + "name": "/m/01xq0k1" + ,"id": 21 + ,"display_name": "cow" + }, + { + "name": "/m/0bwd_0j" + ,"id": 22 + ,"display_name": "elephant" + }, + { + "name": "/m/01dws" + ,"id": 23 + ,"display_name": "bear" + }, + { + "name": "/m/0898b" + ,"id": 24 + ,"display_name": "zebra" + }, + { + "name": "/m/03bk1" + ,"id": 25 + ,"display_name": "giraffe" + }, + { + "name": "/m/01940j" + ,"id": 27 + ,"display_name": "backpack" + }, + { + "name": "/m/0hnnb" + ,"id": 28 + ,"display_name": "umbrella" + }, + { + "name": "/m/080hkjn" + ,"id": 31 + ,"display_name": "handbag" + }, + + { + "name": "/m/01s55n" + ,"id": 33 + ,"display_name": "suitcase" + }, + + { + "name": "/m/03g8mr" + ,"id": 39 + ,"display_name": "baseball bat" + }, + + { + "name": "/m/06_fw" + ,"id": 41 + ,"display_name": "skateboard" + }, + { + "name": "/m/01mzpv" + ,"id": 62 + ,"display_name": "chair" + }, + { + "name": "/m/02crq1" + ,"id": 63 + ,"display_name": "couch" + }, + { + "name": "/m/03ssj5" + ,"id": 65 + ,"display_name": "bed" + }, + { + "name": "/m/04bcr3" + ,"id": 67 + ,"display_name": "dining table" + }, + { + "name": "/m/07c52" + ,"id": 72 + ,"display_name": "tv" + }, + { + "name": "/m/01c648" + ,"id": 73 + ,"display_name": "laptop" + } +] \ No newline at end of file diff --git a/Application/Classifiers/yolov3.cfg b/Application/Classifiers/yolov3.cfg new file mode 100644 index 0000000..938ffff --- /dev/null +++ b/Application/Classifiers/yolov3.cfg @@ -0,0 +1,789 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=608 +height=608 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + diff --git a/Application/Classifiers/yolov4.cfg b/Application/Classifiers/yolov4.cfg new file mode 100644 index 0000000..2985a31 --- /dev/null +++ b/Application/Classifiers/yolov4.cfg @@ -0,0 +1,1157 @@ +[net] +batch=64 +subdivisions=8 +# Training +#width=512 +#height=512 +width=608 +height=608 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0013 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 54 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=leaky + +[route] +layers = -1, -16 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=leaky + +[route] +layers = -1, -37 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +max_delta=5 diff --git a/Application/Config.py b/Application/Config.py index 3a4d2bf..e762d0c 100644 --- a/Application/Config.py +++ b/Application/Config.py @@ -12,7 +12,7 @@ class Config: "tolerance": 20, "maxLength": None, "ttolerance": 60, - "videoBufferLength": 128, + "videoBufferLength": 16, "noiseThreashold": 0.1, "noiseSensitivity": 3/4, "LayersPerContour": 5, @@ -20,6 +20,7 @@ class Config: } def __init__(self): + '''This is basically just a wrapper for a json / python dict''' print("Current Config:", self.c) def __getitem__(self, key): diff --git a/Application/ContourExctractor.py b/Application/ContourExctractor.py index b5d52f9..fa6c143 100644 --- a/Application/ContourExctractor.py +++ b/Application/ContourExctractor.py @@ -21,8 +21,8 @@ from Application.Config import Config class ContourExtractor: - #X = {frame_number: [(contour, (x,y,w,h)), ...], } - + #extracedContours = {frame_number: [(contour, (x,y,w,h)), ...], } + # dict with frame numbers as keys and the contour bounds of every contour for that frame def getextractedContours(self): return self.extractedContours @@ -44,17 +44,15 @@ class ContourExtractor: print("ContourExtractor initiated") - def extractContours(self): - extractedContours = dict() - videoReader = VideoReader(self.config) - + def extractContours(self): + videoReader = VideoReader(self.config) videoReader.fillBuffer() threads = self.config["videoBufferLength"] self.start = time.time() + # start a bunch of frames and let them read from the video reader buffer until the video reader reaches EOF with ThreadPool(threads) as pool: while not videoReader.videoEnded(): - #FrameCount, frame = videoReader.pop() if videoReader.buffer.qsize() == 0: time.sleep(.5) @@ -70,6 +68,7 @@ class ContourExtractor: def getContours(self, data): frameCount, frame = data + # wait for the reference frame, which is calculated by averaging some revious frames while frameCount not in self.averages: time.sleep(0.1) firstFrame = self.averages.pop(frameCount, None) @@ -77,6 +76,7 @@ class ContourExtractor: if frameCount % (60*30) == 0: print(f"{frameCount/(60*30)} Minutes processed in {round((time.time() - self.start), 2)} each") self.start = time.time() + gray = self.prepareFrame(frame) frameDelta = cv2.absdiff(gray, firstFrame) thresh = cv2.threshold(frameDelta, self.threashold, 255, cv2.THRESH_BINARY)[1] diff --git a/Application/Exporter.py b/Application/Exporter.py index 8f936ba..e6fb691 100644 --- a/Application/Exporter.py +++ b/Application/Exporter.py @@ -21,7 +21,7 @@ class Exporter: if raw: self.exportRawData(layers) if layered and overlayed: - print("Layered and Individual are mutially exclusive, Individual was choosen automatically") + print("Layered and Individual are mutually exclusive, individual was choosen automatically") overlayed = False if layered and not overlayed: self.exportLayers(layers) diff --git a/Application/Layer.py b/Application/Layer.py index 3899f36..ad7b7b3 100644 --- a/Application/Layer.py +++ b/Application/Layer.py @@ -17,6 +17,16 @@ class Layer: length = None def __init__(self, startFrame, data, config): + '''returns a Layer object + + Layers are collections of contours with a StartFrame, + which is the number of the frame the first contour of + this layer was extraced from + + A Contour is a CV2 Contour, which is a y*x*3 rgb numpy array, + but we only care about the corners of the contours. + So we save the bounds (x,y,w,h) in bounds[] and the actual content in data[] + ''' self.startFrame = startFrame self.lastFrame = startFrame self.config = config @@ -25,22 +35,28 @@ class Layer: self.bounds.append([data]) #print("Layer constructed") - def add(self, frameNumber, data): + def add(self, frameNumber, bound): + '''Adds a bound''' if not self.startFrame + len(self.bounds) < frameNumber: if len(self.bounds[self.startFrame - frameNumber]) >= 1: - self.bounds[self.startFrame - frameNumber].append(data) + self.bounds[self.startFrame - frameNumber].append(bound) else: self.lastFrame = frameNumber - self.bounds.append([data]) + self.bounds.append([bound]) self.getLength() def getLength(self): + return len(self) + + def __len__(self): self.length = len(self.bounds) return self.length def fill(self, inputPath, resizeWidth): - '''reads in the contour data, needed for export''' + '''deprecated + + Fills the data[] array by iterateing over the bounds''' cap = cv2.VideoCapture(inputPath) self.data = [None]*len(self.bounds) @@ -57,12 +73,18 @@ class Layer: cap.release() def clusterDelete(self): + '''Uses a cluster analysis to remove contours which are not the result of movement''' org = self.bounds + if len(org) == 1: + return mapped = [] mapping = [] clusterCount = 1 noiseSensitivity = self.config["noiseSensitivity"] noiseThreashold = self.config["noiseThreashold"] + + # calculates the middle of each contour in the 2d bounds[] and saves it in 1d list + # and saves the 2d indexes in a mapping array for i, bounds in enumerate(org): for j, bound in enumerate(bounds): x = (bound[0] + bound[2]/2) / self.config["w"] @@ -76,6 +98,7 @@ class Layer: centers = [] kmeans = None + # the loop isn't nessecary (?) if the number of clusters is known, since it isn't the loop tries to optimize while True: kmeans = KMeans(init="random", n_clusters=clusterCount, n_init=5, max_iter=300, random_state=42) kmeans.fit(mapped) @@ -96,12 +119,21 @@ class Layer: centers = kmeans.cluster_centers_ break + # transformes the labels array + # new array: + # the index is the cluster id, the array is the id of the contour + # [ + # [1,2,3] + # [3,4,5] + # [6,7,8,9] + # ] classed = [[]] for i, x in enumerate(list(labels)): while len(classed) <= x: classed.append([]) classed[x].append(i) + # calculates the euclidean distance (without the sqrt) of each point in a cluster to the cluster center dists = [] for num, cen in enumerate(centers): dist = 0 @@ -110,9 +142,10 @@ class Layer: dist/=len(classed[num]) dists.append(dist*1000) + # copy all contours of the clusters with more movement than the threshold newContours = [[]] for i, dis in enumerate(dists): - # copy contours which are spread out, delete rest by not yopying them + # copy contours which are spread out, delete rest by not copying them if dis > noiseThreashold: for j in classed[i]: x, y = mapping[j] diff --git a/Application/LayerFactory.py b/Application/LayerFactory.py index b1434c6..49c117e 100644 --- a/Application/LayerFactory.py +++ b/Application/LayerFactory.py @@ -23,6 +23,7 @@ class LayerFactory: self.extractLayers(data) def extractLayers(self, data = None): + '''Bundle given contours together into Layer Objects''' if self.data is None: if data is None: print("LayerFactory data was none") @@ -48,8 +49,6 @@ class LayerFactory: #for x in tmp: #self.getLayers(x) - - return self.layers def getLayers(self, data): diff --git a/Application/LayerManager.py b/Application/LayerManager.py index 36ffad7..7b0b82a 100644 --- a/Application/LayerManager.py +++ b/Application/LayerManager.py @@ -3,9 +3,10 @@ from Application.Config import Config from Application.VideoReader import VideoReader from Application.Exporter import Exporter from multiprocessing.pool import ThreadPool +from Application.Classifiers.Classifier import Classifier import cv2 import numpy as np - +import time class LayerManager: def __init__(self, config, layers): self.data = {} @@ -17,12 +18,16 @@ class LayerManager: self.resizeWidth = config["resizeWidth"] self.footagePath = config["inputPath"] self.config = config + self.classifier = Classifier() + self.tags = [] print("LayerManager constructed") + + def cleanLayers(self): self.freeMin() self.sortLayers() - self.cleanLayers() + #self.cleanLayers2() self.freeMax() def removeStaticLayers(self): @@ -52,7 +57,7 @@ class LayerManager: if l.getLength() > self.minLayerLength: layers.append(l) self.layers = layers - self.removeStaticLayers() + def freeMax(self): layers = [] @@ -60,39 +65,45 @@ class LayerManager: if l.getLength() < self.maxLayerLength: layers.append(l) self.layers = layers - self.removeStaticLayers() + - def fillLayers(self): + def tagLayers(self): + '''Use classifieres the tag all Layers, by reading the contour content from the original video, then applying the classifier''' + exporter = Exporter(self.config) + start = time.time() + for i, layer in enumerate(self.layers): + print(f"{round(i/len(self.layers)*100,2)} {round((time.time() - start), 2)}") + start = time.time() + if len(layer.bounds[0]) == 0: + continue + listOfFrames = exporter.makeListOfFrames([layer]) - listOfFrames = Exporter(self.config).makeListOfFrames(self.layers) - videoReader = VideoReader(self.config, listOfFrames) - videoReader.fillBuffer() + videoReader = VideoReader(self.config, listOfFrames) + videoReader.fillBuffer() - while not videoReader.videoEnded(): - frameCount, frame = videoReader.pop() - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - for i, layer in enumerate(self.layers): - if i % 20 == 0: - print(f"filled {int(round(i/len(self.layers),2)*100)}% of all Layers") - - if layer.startFrame <= frameCount and layer.startFrame + len(layer.bounds) > frameCount: - data = [] - for (x, y, w, h) in layer.bounds[frameCount - layer.startFrame]: - if x is None: - break - factor = videoReader.w / self.resizeWidth - x = int(x * factor) - y = int(y * factor) - w = int(w * factor) - h = int(h * factor) - data.append(np.copy(frame[y:y+h, x:x+w])) - layer.data.append(data) + while not videoReader.videoEnded(): + frameCount, frame = videoReader.pop() + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + data = [] + for (x, y, w, h) in layer.bounds[frameCount - layer.startFrame]: + if x is None: + break + factor = videoReader.w / self.resizeWidth + x = int(x * factor) + y = int(y * factor) + w = int(w * factor) + h = int(h * factor) + data.append(np.copy(frame[y:y+h, x:x+w])) + layer.data.append(data) + tags = self.classifier.tagLayer(layer.data) + print(tags) + self.tags.append(tags) - videoReader.thread.join() + videoReader.thread.join() def sortLayers(self): self.layers.sort(key = lambda c:c.startFrame) - def cleanLayers(self): + def cleanLayers2(self): for layer in self.layers: layer.clusterDelete() diff --git a/Application/VideoReader.py b/Application/VideoReader.py index 5010ba3..8a766bf 100644 --- a/Application/VideoReader.py +++ b/Application/VideoReader.py @@ -24,11 +24,11 @@ class VideoReader: self.buffer = Queue(config["videoBufferLength"]) self.vc = cv2.VideoCapture(videoPath) self.stopped = False - self.getWH() if setOfFrames is not None: self.listOfFrames = sorted(setOfFrames) def getWH(self): + '''get width and height''' res, image = self.vc.read() self.w = image.shape[1] self.h = image.shape[0] @@ -37,15 +37,11 @@ class VideoReader: def pop(self): return self.buffer.get(block=True) - def get(self): - return self.buffer[-1] - def fillBuffer(self): if self.buffer.full(): print("VideoReader::fillBuffer was called when buffer was full.") self.endFrame = int(self.vc.get(cv2.CAP_PROP_FRAME_COUNT)) - #self.endFrame = 10*60*30 if self.listOfFrames is not None: self.thread = threading.Thread(target=self.readFramesByList, args=()) else: @@ -57,6 +53,7 @@ class VideoReader: self.vc.release() def readFrames(self): + '''Reads video from start to finish''' while self.lastFrame < self.endFrame: res, frame = self.vc.read() if res: @@ -64,9 +61,9 @@ class VideoReader: self.lastFrame += 1 self.stopped = True - def readFramesByList(self): + '''Reads all frames from a list of frame numbers''' self.vc.set(1, self.listOfFrames[0]) self.lastFrame = self.listOfFrames[0] self.endFrame = self.listOfFrames[-1] @@ -76,6 +73,8 @@ class VideoReader: res, frame = self.vc.read() if res: self.buffer.put((self.lastFrame, frame)) + else: + print("READING FRAMES IS FALSE") # since the list is sorted the first element is always the lowest relevant framenumber # [0,1,2,3,32,33,34,35,67,68,69] self.listOfFrames.pop(0) diff --git a/main.py b/main.py index 57e5313..445fb70 100644 --- a/main.py +++ b/main.py @@ -9,44 +9,39 @@ from Application.Importer import Importer from Application.VideoReader import VideoReader from Application.LayerManager import LayerManager from Application.Classifiers import * -#TODO -# finden von relevanten Stellen anhand von zu findenen metriken für vergleichsbilder -def demo(): - print("startup") +def main(): start = time.time() config = Config() - config["inputPath"] = os.path.join(os.path.dirname(__file__), "generate test footage/3.mp4") - config["importPath"] = os.path.join(os.path.dirname(__file__), "output/short.txt") + config["inputPath"] = os.path.join(os.path.dirname(__file__), "generate test footage/out.mp4") + #config["importPath"] = os.path.join(os.path.dirname(__file__), "output/short.txt") config["outputPath"] = os.path.join(os.path.dirname(__file__), "output/short.mp4") vr = VideoReader(config) config["w"], config["h"] = vr.getWH() if config["importPath"] is None: - #ana = Analyzer(config) - #ref = ana.avg contours = ContourExtractor(config).extractContours() print("Time consumed extracting: ", time.time() - start) layerFactory = LayerFactory(config) layers = layerFactory.extractLayers(contours) - layerManager = LayerManager(config, layers) - layerManager.cleanLayers() - layers = layerManager.layers + else: layers = Importer(config).importRawData() + layerManager = LayerManager(config, layers) + layerManager.cleanLayers() + + #layerManager.tagLayers() + layers = layerManager.layers exporter = Exporter(config) - exporter.export(layers) + exporter.export(layers, raw=False) print("Total time: ", time.time() - start) -def init(): - print("not needed yet") - if __name__ == "__main__": - demo() + main()