🕌 🤹🏽 🔨 Menemukan pelanggaran dalam video menggunakan computer vision 👰🏻 👨🏻‍🔧 🙄

Anggap saja pelanggaran ini mungkin terjadi. Bagaimana cara mengidentifikasinya?

Kami memiliki catatan pembuangan dari kamera pengintai di tempat kerja karyawan dan log operasi.

Kami akan mencari semua momen dalam catatan di mana klien tidak hadir. Jaringan saraf MobileNet dan Pelacak CSRT dari perpustakaan opencv akan membantu kami dalam hal ini. Dan untuk kenyamanan, juga Tesseract-OCR.

Untuk menemukan seseorang dalam bingkai, kami akan menggunakan jaringan saraf MobileNet. Jaringan ini memungkinkan Anda mendeteksi dan melokalkan 20 jenis objek dalam gambar. Agar berfungsi, Anda perlu mengunduh dua file: arsitektur dan bobot. File-file ini dapat ditemukan di repositori Github .

Sebelum menulis kode, kita perlu menginstal pustaka computer vision cv2 dan paket pytesseract untuk memproses teks pada gambar.

!pip install opencv-python
!pip install pytesseract

Agar pytesseract berfungsi, Anda harus mengunduh distribusi Tesseract-OCR dari situs resminya terlebih dahulu dan menginstalnya.

Memulai persiapan untuk pemrosesan video

Kami mengimpor paket dan menulis jalur ke folder Tesseract-OCR di lingkungan lokal:

import os

video_path = ... #  
tesseract_path = ... #   Tesseract
os.environ["PATH"] += os.pathsep + tesseract_path

import pytesseract
import cv2
import imutils
import pandas as pd
import datetime as dt

, . , / :

df = pd.DataFrame(columns = ['', '  '])
work_place = () #,   
date = None #      
tracked = False #

, . , :

prototxt = 'MobileNetSSD_deploy.prototxt' #
weights = 'MobileNetSSD_deploy.caffemodel' #

20 , :

classNames = {0: 'background',
              1: 'aeroplane',
              2: 'bicycle',
              3: 'bird',
              4: 'boat',
              5: 'bottle',
              6: 'bus',
              7: 'car',
              8: 'cat',
              9: 'chair',
              10: 'cow',
              11: 'diningtable',
              12: 'dog',
              13: 'horse',
              14: 'motorbike',
              15: 'person',
              16: 'pottedplant',
              17: 'sheep',
              18: 'sofa',
              19: 'train',
              20: 'tvmonitor'}

, .

thr = 0.1 #

net = cv2.dnn.readNetFromCaffe(prototxt, weights) #

cv2.VideoCapture, :

cap = cv2.VideoCapture(video_path)

, .read(), . , . . :

%%time

cap = cv2.VideoCapture(video_path)

total_frame = 0
while True:
    success, frame = cap.read()
    if success:
        total_frame += 1
    else:
        break
        

video_length = ... #   
fps = round(total_frame / video_length)
fps

, . 100- 2 .

, , , . , , .

while cap.isOpened():

    ret, frame = cap.read()
    
    if ret:
        
        frame = imutils.resize(frame, width=1200) # ,   


        #  ,    
        if len(work_place) == 0:
            cv2.putText(frame, 'Set the client\'s location', (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 
                2, (0,255,0), 2)
            work_place = cv2.selectROI('frame', frame, fromCenter=False, showCrosshair=True)
            x, y, w, h = [int(coord) for coord in work_place]
            
        # 
        if not date:
            try:
                cv2.putText(frame, 'Set the date, (0, 160), cv2.FONT_HERSHEY_SIMPLEX, 
                    2, (0,255,0), 2)
                date = cv2.selectROI('frame', frame, fromCenter=False, showCrosshair=True)
                date_x, date_y, date_w, date_h = [int(coord) for coord in date]
                date_ = frame[date_y : date_y+date_h, date_x : date_x+date_w]
                date_ = cv2.cvtColor(date_, cv2.COLOR_BGR2GRAY) #   
                #date_ = cv2.threshold(date_, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
                date_ = cv2.threshold(date_, 180, 255, 0)[1] #     
                date = pytesseract.image_to_string(date_)
                date = dt.datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
                
            except:
                print('   ,      -- ::')
                date_ = input()
                date = dt.datetime.strptime(date_, '%Y-%m-%d %H:%M:%S')
                

        if cap.get(1) % fps == 0:
            date += dt.timedelta(seconds = 1)
        
        if not tracked or (cap.get(1) % (fps * 30) == 0):

            # 
            frame_resized = cv2.resize(frame, (300, 300)) #   300  300 
            blob = cv2.dnn.blobFromImage(frame_resized, 0.007843, 
                                         (300,300), (127.5, 127.5, 127.5), False)

            #    
            net.setInput(blob)
            detections = net.forward() 
            #[0, 0, object, [0, class_id, confidence, xLeftBottom, yLeftBottom, xRightTop, yRightTop]]

            #   
            cols = frame_resized.shape[1]
            rows = frame_resized.shape[0]

            #       
            for obj in detections[0,0, :, :]:
                confidence = obj[2]
                if confidence > thr:

                    class_id = int(obj[1])
                    if class_id == 15:

                        xLeftBottom = int(obj[3] * cols)
                        yLeftBottom = int(obj[4] * rows)
                        xRightTop   = int(obj[5] * cols)
                        yRightTop   = int(obj[6] * rows)

                        #     
                        heightFactor = frame.shape[0] / 300.0
                        widthFactor = frame.shape[1] / 300.0

                        #    
                        xLeftBottom = int(widthFactor * xLeftBottom)
                        yLeftBottom = int(heightFactor * yLeftBottom)
                        xRightTop   = int(widthFactor * xRightTop)
                        yRightTop   = int(heightFactor * yRightTop)

                        #    
                        xCenter = xLeftBottom + (xRightTop - xLeftBottom)/2
                        yCenter = yLeftBottom + (yRightTop - yLeftBottom)/2

                        #     
                        if xCenter < x + w and yCenter < y + h and xCenter > x and yCenter > y:
                            tracker = cv2.TrackerCSRT_create()
                            tracker.init(frame, (xLeftBottom, yLeftBottom, xRightTop-xLeftBottom, yRightTop-yLeftBottom))
                            tracked = True
                            cv2.rectangle(frame, (xLeftBottom,yLeftBottom), (xRightTop,yRightTop), (0,255,0), 3, 1)
                            break
                        else:
                            tracked = False
        else:
            _, bbox = tracker.update(frame)
            X, Y, W, H = [int(coord) for coord in bbox]

            xCenter = X + W/2
            yCenter = Y + H/2
            
            if xCenter < x + w and yCenter < y + h and xCenter > x and yCenter > y:
                
                tracked = True
                cv2.rectangle(frame, (X,Y), (X + W, Y + H), (255,255,0), 3, 1)
            else:
                tracked = False

        cv2.imshow('frame', frame)
        df.loc[cap.get(1), :] = [date, tracked]
        print(cap.get(1), date, tracked) #  ,   / 
        if cv2.waitKey(1) == 27: #ESC
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()

.read() : , , – . , , , .

. , «-- ::». , : , . date

.

Tesseract-

, . , , .

.get() 1 , , fps

, date

. tesseract, , .

: cv2.resize() cv2.dnn.blobFromImage(). , . , detections

. , 20 .

, 15. , , tracked

True . tracked

date

df

.

, . , tracked

True False, .

. , , . .

, / . , , . , .

df_ = df.groupby('', as_index=False).agg(max)
df_.to_excel('output.xlsx', index=False)

, :

. , . opencv. , , .
. .
. , , . «» , .

Masalah pertama dan kedua dapat diselesaikan oleh pelacak berdasarkan pembelajaran yang mendalam. Misalnya, pelacak GOTURN

. Pelacak ini diterapkan di perpustakaan opencv

, tetapi untuk operasinya Anda perlu mengunduh file tambahan. Anda juga dapat menggunakan pelacak populer Re3

atau pelacak yang baru-baru ini diperkenalkan AcurusTrack

. Masalah ketiga dapat diselesaikan dengan mengganti jaringan saraf dan / atau melatihnya kembali pada orang yang duduk.

Tautan ke kode .

Menemukan pelanggaran dalam video menggunakan computer vision

More articles: