Computer Vision : Smile Detection in Images
While juggling between binge watching Money Heist and Coding, here is what I came up with: "Smile Detection In Images". While this doesn't sound as sophisticated as finding whether a person is wearing a mask or not, could be thought of as a good start. Pick simple cases first, build the complexity, learn new concepts and create complex apps. Thats how it goes.
As they says "Do not bite off more than you can chew"...
What I did:
- Read an image
- Detect the face using DLib (Check out the post Computer Vision : Face Detection using DLIB for more info on face detection)
- Compute the Landmarks
- Pick the landmarks of choice (In our case "The mouth" and "The Jaw")
- Perform smile detection using simple math
Haven't used ML/DL for this and hence it is not super accurate. Stay tuned for more accurate examples.
This can be easily extended to applying the same logic to Videos/Youtube Videos as well. Sample code provided at the end.
import dlib,cv2
import numpy as np
import math
################################################################
# Method: #
# 1. To detect the mouth in an image and draw tha landmarks. #
# 2. Detect whether or not the person in the image is smiling #
# Note: #
# This needs a lot of refinement to incrase the accuracy. #
# It purely relies on math instead of using #
import numpy as np
import math
################################################################
# Method: #
# 1. To detect the mouth in an image and draw tha landmarks. #
# 2. Detect whether or not the person in the image is smiling #
# Note: #
# This needs a lot of refinement to incrase the accuracy. #
# It purely relies on math instead of using #
# Deep Learning or Machine Learning libraries #
################################################################
def renderSmile(im, landmarks, ratio,color=(0, 0, 0), radius=3):
for i in range(48, 68):
point = (landmarks.part(i).x, landmarks.part(i).y)
cv2.circle(im, point, radius, color, -1)
font = cv2.FONT_HERSHEY_SIMPLEX
org = (20, 30)
fontScale = 1
color = (60,20,220)
thickness = 2
if (ratio > 45.0):
cv2.putText(im, 'Smiling <3', org, font, fontScale, color, thickness, cv2.LINE_AA)
else:
cv2.putText(im, 'Not Smiling', org, font, fontScale, color, thickness, cv2.LINE_AA)
PREDICTOR_PATH = "shape_predictor_68_face_landmarks.dat"
faceDetector = dlib.get_frontal_face_detector()
landmarkDetector = dlib.shape_predictor(PREDICTOR_PATH)
imageFilename = "JuliaRoberts_NS.jpg"
#imageFilename = "JuliaRoberts.png"
im= cv2.imread(imageFilename)
###############################################################
# Resizing image to a smaller window for faster face detection#
###############################################################
height = im.shape[0]
IMAGE_RESIZE = float(height)/450
im = cv2.resize(im, None, fx=1.0/IMAGE_RESIZE, fy=1.0/IMAGE_RESIZE,interpolation = cv2.INTER_LINEAR)
###############################################################
# Detect Faces #
###############################################################
faceRects = faceDetector(im, 1)
print("Number of faces detected: ",len(faceRects))
#################################################################
# Loop over the detected faces and compute the landmarks needed #
# Mouth Start : Landmark 48 #
# Mouth End : Landmark 54 #
# Jaw Start : Landmark 0 #
# Jaw End : Landmark 16 #
#################################################################
for (i, faces) in enumerate(faceRects):
landmarks = landmarkDetector(im, faces)
x_1 = landmarks.parts()[48].x
x_2 = landmarks.parts()[54].x
y_1 = landmarks.parts()[48].y
y_2 = landmarks.parts()[54].y
mouth_distance=math.sqrt((pow((x_2-x_1),2))+(pow((y_2-y_1),2)))
x_1 = landmarks.parts()[0].x
x_2 = landmarks.parts()[16].x
y_1 = landmarks.parts()[0].y
y_2 = landmarks.parts()[16].y
jaw_distance=math.sqrt((pow((x_2-x_1),2))+(pow((y_2-y_1),2)))
ratio = (mouth_distance/jaw_distance) * 100
print("Ratio", ratio)
renderSmile(im, landmarks,ratio)
#outputFileName = "familyLandmarks.jpg"
#print("Saving output image to", outputFileName)
#cv2.imwrite(outputFileName, im)
cv2.imshow("Facial Landmark detector", im)
cv2.waitKey(0)
cv2.destroyAllWindows()
################################################################
def renderSmile(im, landmarks, ratio,color=(0, 0, 0), radius=3):
for i in range(48, 68):
point = (landmarks.part(i).x, landmarks.part(i).y)
cv2.circle(im, point, radius, color, -1)
font = cv2.FONT_HERSHEY_SIMPLEX
org = (20, 30)
fontScale = 1
color = (60,20,220)
thickness = 2
if (ratio > 45.0):
cv2.putText(im, 'Smiling <3', org, font, fontScale, color, thickness, cv2.LINE_AA)
else:
cv2.putText(im, 'Not Smiling', org, font, fontScale, color, thickness, cv2.LINE_AA)
PREDICTOR_PATH = "shape_predictor_68_face_landmarks.dat"
faceDetector = dlib.get_frontal_face_detector()
landmarkDetector = dlib.shape_predictor(PREDICTOR_PATH)
imageFilename = "JuliaRoberts_NS.jpg"
#imageFilename = "JuliaRoberts.png"
im= cv2.imread(imageFilename)
###############################################################
# Resizing image to a smaller window for faster face detection#
###############################################################
height = im.shape[0]
IMAGE_RESIZE = float(height)/450
im = cv2.resize(im, None, fx=1.0/IMAGE_RESIZE, fy=1.0/IMAGE_RESIZE,interpolation = cv2.INTER_LINEAR)
###############################################################
# Detect Faces #
###############################################################
faceRects = faceDetector(im, 1)
print("Number of faces detected: ",len(faceRects))
#################################################################
# Loop over the detected faces and compute the landmarks needed #
# Mouth Start : Landmark 48 #
# Mouth End : Landmark 54 #
# Jaw Start : Landmark 0 #
# Jaw End : Landmark 16 #
#################################################################
for (i, faces) in enumerate(faceRects):
landmarks = landmarkDetector(im, faces)
x_1 = landmarks.parts()[48].x
x_2 = landmarks.parts()[54].x
y_1 = landmarks.parts()[48].y
y_2 = landmarks.parts()[54].y
mouth_distance=math.sqrt((pow((x_2-x_1),2))+(pow((y_2-y_1),2)))
x_1 = landmarks.parts()[0].x
x_2 = landmarks.parts()[16].x
y_1 = landmarks.parts()[0].y
y_2 = landmarks.parts()[16].y
jaw_distance=math.sqrt((pow((x_2-x_1),2))+(pow((y_2-y_1),2)))
ratio = (mouth_distance/jaw_distance) * 100
print("Ratio", ratio)
renderSmile(im, landmarks,ratio)
#outputFileName = "familyLandmarks.jpg"
#print("Saving output image to", outputFileName)
#cv2.imwrite(outputFileName, im)
cv2.imshow("Facial Landmark detector", im)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output:
Note:
I am a big fan of Julia Roberts <3
Code to apply the same code on a Youtube Video:
import pafy
url = 'https://youtu.be/aW8BDgLpZkI'
vPafy = pafy.new(url)
play = vPafy.getbest(preftype="mp4")
# Initializing video capture object.
capture = cv2.VideoCapture(play.url)
if(False == capture.isOpened()):
print("[ERROR] Video not opened properly")
# Create a VideoWriter object
smileDetectionOut = cv2.VideoWriter("smileDetectionYoutubeOutput.avi",
cv2.VideoWriter_fourcc('M','J','P','G'),
15,(int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))))
frame_number = 0
smile_frames = []
while (True):
# grab the next frame
isGrabbed, frame = capture.read()
if not isGrabbed:
break
imDlib = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_has_smile = smile_detector(imDlib)
if (True == frame_has_smile):
cv2.putText(frame, "Smiling :)", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2, cv2.LINE_AA)
smile_frames.append(frame_number)
if frame_number % 50 == 0:
print('\nProcessed {} frames'.format(frame_number))
print("Smile detected in Frames: {}".format(smile_frames))
# Write to VideoWriter
smileDetectionOut.write(frame)
frame_number += 1
capture.release()
smileDetectionOut.release()
url = 'https://youtu.be/aW8BDgLpZkI'
vPafy = pafy.new(url)
play = vPafy.getbest(preftype="mp4")
# Initializing video capture object.
capture = cv2.VideoCapture(play.url)
if(False == capture.isOpened()):
print("[ERROR] Video not opened properly")
# Create a VideoWriter object
smileDetectionOut = cv2.VideoWriter("smileDetectionYoutubeOutput.avi",
cv2.VideoWriter_fourcc('M','J','P','G'),
15,(int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))))
frame_number = 0
smile_frames = []
while (True):
# grab the next frame
isGrabbed, frame = capture.read()
if not isGrabbed:
break
imDlib = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_has_smile = smile_detector(imDlib)
if (True == frame_has_smile):
cv2.putText(frame, "Smiling :)", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2, cv2.LINE_AA)
smile_frames.append(frame_number)
if frame_number % 50 == 0:
print('\nProcessed {} frames'.format(frame_number))
print("Smile detected in Frames: {}".format(smile_frames))
# Write to VideoWriter
smileDetectionOut.write(frame)
frame_number += 1
capture.release()
smileDetectionOut.release()
Comments
Post a Comment
Hey there, feel free to leave a comment.