day 20 face recognition embedding

16 minute read

Embedding

고차원 정보를 저차원으로 변환하면서 필요한 정보를 보존하는 것이 임베딩입니다. 얼굴 이미지가 저차원으로 변환되고, 이 변환된 벡터에 누군지 알아볼 수 있는 정보가 남는다고 가정하면 임베딩 벡터만으로도 얼굴을 비교할 수 있게 됩니다.

임베딩 기법을 활용하여 표현할수 있는 데이터 유형은

텍스트(단어, 문장, 전체 문서), 이미지, 오디오 등과 같은 구조화되지 않은 데이터

사용자가 시청한 영화 목록 및 사용자 ID와 같이 상호 작용 컨텍스트만 있고 입력 특성이 없는 항목

그래프 및 네트워크와 같은 복잡한 구조 데이터. 예: 소셜 네트워크 및 생화학 화합물

텍스트 설명을 사용한 이미지 검색 및 이미지 캡션 작성과 같은 다중 모달 변환 위치 및 점유와 같은 희소 특성(이를 밀집 특성으로 변환)

인구통계, 사회, 금융, 행동 속성이 300개 이상 포함된 고객 레코드와 같은 고차원 항목(이러한 항목을 보다 간결한 표현으로 변환)

등이 있습니다.

이 데이터들을 유사성 분석, 검색, 기계전이 학습에 활용이 가능합니다.

얼굴 임베딩 만들기(1) 얼굴 인식

image

Face Recognition GitHub Repository

import os

dir_path = os.getenv('HOME')+'/aiffel/face_embedding/images'
file_list = os.listdir(dir_path)

print ("file_list: {}".format(file_list))

import matplotlib.pyplot as plt
import matplotlib.image as img

#Set figsize here
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(24,10))

# flatten axes for easy iterating
for i, ax in enumerate(axes.flatten()):
  image = img.imread(dir_path+'/'+file_list[i])
  ax.imshow(image)
plt.show()

fig.tight_layout()
file_list: ['clinton.jpeg', 'obama.jpg', 'biden.jpg', 'trump.jpg', 'reagan.jpg', 'bush.jpeg']

png

import face_recognition
import os
%matplotlib inline
import matplotlib.pyplot as plt

image_path = os.getenv('HOME')+'/aiffel/face_embedding/images/obama.jpg'
image = face_recognition.load_image_file(image_path)
face_locations = face_recognition.face_locations(image)

print(face_locations)  # 이미지에서 얼굴 영역의 좌표를 출력합니다.

a, b, c, d = face_locations[0]
cropped_face = image[a:c,d:b,:]

plt.imshow(cropped_face)   # 이미지에서 얼굴영역만 잘라낸 cropped_face를 그려 봅니다.
[(98, 758, 284, 572)]





<matplotlib.image.AxesImage at 0x7f8c0ed484d0>

png


import face_recognition
import os

def get_cropped_face(image_file):
    image = face_recognition.load_image_file(image_file)
    face_locations = face_recognition.face_locations(image)
    a, b, c, d = face_locations[0]
    cropped_face = image[a:c,d:b,:]
    
    return cropped_face

image_path = os.getenv('HOME')+'/aiffel/face_embedding/images/trump.jpg'

cropped_face = get_cropped_face(image_path)
plt.imshow(cropped_face)
<matplotlib.image.AxesImage at 0x7f8c0ecb6b50>

png

얼굴 임베딩 만들기(2) facenet

2015년 구글에서 발표한 FaceNet을 통해 배워보도록 합시다. FaceNet은 우리가 봐왔던 딥러닝 모델과 크게 다른 것은 없지만 네트워크 뒤에 L2 Normalization을 거쳐 임베딩을 만들어 내고 여기에 Triplet Loss를 사용하고 있습니다.

image

import os

dir_path = os.getenv('HOME')+'/aiffel/face_embedding/images'
file_list = os.listdir(dir_path)

print ("file_list: {}".format(file_list))
file_list: ['clinton.jpeg', 'obama.jpg', 'biden.jpg', 'trump.jpg', 'reagan.jpg', 'bush.jpeg']
image_file = os.path.join(dir_path, 'obama.jpg')
face = get_cropped_face(image_file)   # 얼굴 영역을 구하는 함수(이전 스텝에서 구현)

# 얼굴 영역을 가지고 얼굴 임베딩 벡터를 구하는 함수
def get_face_embedding(face):
    return face_recognition.face_encodings(face)

embedding = get_face_embedding(face)  
embedding
[array([-0.08126248,  0.11014761, -0.01082448, -0.05268792,  0.01033717,
        -0.00813808, -0.07251769, -0.0922646 ,  0.19304723, -0.09246384,
         0.23908533,  0.06770287, -0.22123125, -0.14119367,  0.05562152,
         0.13705511, -0.17924425, -0.07232306, -0.11183281, -0.11370818,
         0.03956435, -0.01019115,  0.0942178 ,  0.04493114, -0.12757549,
        -0.34449592, -0.05374938, -0.17703305,  0.00868226, -0.09721592,
        -0.09817187,  0.00278232, -0.17721367, -0.12789807,  0.03489833,
        -0.01991234, -0.00838933, -0.00132862,  0.18308581,  0.02447568,
        -0.1237656 ,  0.09957044,  0.02323568,  0.22983313,  0.2838524 ,
         0.06655717, -0.00558912, -0.09833544,  0.10058986, -0.23476946,
         0.06006292,  0.1341591 ,  0.08373027,  0.03900504,  0.10194533,
        -0.19337648,  0.01717628,  0.08978969, -0.16005114,  0.01892098,
         0.03082444, -0.06051281, -0.04526773,  0.04633342,  0.20678686,
         0.10299795, -0.12437531, -0.0490516 ,  0.12252682, -0.0280379 ,
         0.04286709,  0.01386448, -0.18673278, -0.22230086, -0.23382807,
         0.07662997,  0.3739067 ,  0.18993473, -0.20606737,  0.0244521 ,
        -0.18487695,  0.04949443,  0.09670059, -0.00123947, -0.0688749 ,
        -0.13414779, -0.04103697,  0.06179445,  0.07108597,  0.02471443,
        -0.04194992,  0.22575834, -0.02099216,  0.04838851,  0.02123774,
         0.05604827, -0.14657559, -0.02243515, -0.17425998, -0.06527615,
         0.02526723, -0.04892462,  0.05058879,  0.13397783, -0.23065181,
         0.06054964,  0.02031171, -0.0191126 ,  0.03631671,  0.06983903,
        -0.03496742, -0.03203756,  0.05493437, -0.24500111,  0.24993542,
         0.24442896,  0.04081136,  0.16412377,  0.06019448,  0.0062465 ,
        -0.00932551, -0.02752422, -0.17775527, -0.03611944,  0.04782868,
         0.06031797,  0.06846569,  0.00587987])]
def get_face_embedding_dict(dir_path):
    file_list = os.listdir(dir_path)
    embedding_dict = {}
    
    for file in file_list:
        img_path = os.path.join(dir_path, file)
        face = get_cropped_face(img_path)
        embedding = get_face_embedding(face)
        if len(embedding) > 0:  
        # 얼굴영역 face가 제대로 detect되지 않으면  len(embedding)==0인 경우가 발생하므로 
        # os.path.splitext(file)[0]에는 이미지파일명에서 확장자를 제거한 이름이 담깁니다. 
            embedding_dict[os.path.splitext(file)[0]] = embedding[0]
        
    return embedding_dict


embedding_dict = get_face_embedding_dict(dir_path)
embedding_dict['trump']
array([-0.15962467,  0.20268655,  0.0323635 ,  0.02439232, -0.06226439,
        0.05140705,  0.07351795, -0.16823348,  0.06962204, -0.05557961,
        0.17270109, -0.05094168, -0.35293093, -0.0316438 ,  0.03792984,
        0.17343847, -0.13260126, -0.15357377, -0.23523533, -0.08442692,
       -0.00134139,  0.03210667, -0.08926154, -0.02670781, -0.09581181,
       -0.25792354, -0.01056999, -0.11071672,  0.03133569, -0.07444921,
        0.05670552, -0.02273796, -0.19461137, -0.04576054,  0.00947582,
        0.01806056, -0.1502624 , -0.07787319,  0.17577608,  0.01933258,
       -0.19744575, -0.03930519,  0.07374467,  0.20196027,  0.1476755 ,
       -0.0141539 ,  0.0172476 , -0.11976205,  0.09462971, -0.24318144,
       -0.02755127,  0.12169892,  0.14837705,  0.17079785,  0.06952555,
       -0.15360466,  0.02005427,  0.08199155, -0.19544823,  0.10653654,
        0.11468476, -0.21902837, -0.07677591, -0.06791042,  0.10587606,
        0.05921567, -0.06115703, -0.11171092,  0.23917492, -0.15791436,
       -0.13834007,  0.01942121,  0.01826206, -0.1869376 , -0.32023543,
        0.01407088,  0.34068239,  0.20081407, -0.19736893, -0.06809217,
       -0.0536031 , -0.03747801,  0.07641605,  0.09667511, -0.04759219,
       -0.09811257, -0.05056169, -0.01899303,  0.24124743, -0.04360487,
        0.00636286,  0.21936756,  0.01126812, -0.04524586, -0.00227514,
        0.02113587, -0.11754515, -0.05986457, -0.08348871, -0.07204333,
       -0.00065423, -0.14080839,  0.03825339,  0.08760153, -0.11946712,
        0.18282008,  0.03533202,  0.01670191, -0.00815773, -0.00964593,
       -0.06726068,  0.01727787,  0.16389623, -0.16469438,  0.25836599,
        0.20883362, -0.0511426 ,  0.0673226 ,  0.11310361,  0.08522274,
       -0.01515304, -0.03709226, -0.22167102, -0.18071674,  0.06958843,
        0.02523582, -0.01741696,  0.03869874])

임베딩 사이 거리 측정

모델에서 거리가 먼 두 얼굴 이미지의 임베딩은 서로 다른 사람일 확률이 높다고 볼 수 있고 가까우면 같은 사람일 확률이 높다고 볼 수 있습니다. 그럼 실제로 한번 비교해볼까요?

image

위의 세 사람의 사진(이미지 출처 : pexels.com) 을 Face Detection 모델로 미리 잘라 왔습니다. 그리고 이전 스텝에서 사용했던 모델을 사용해서 128차원의 임베딩 벡터를 추출했습니다

import numpy as np

A = np.array([0.0019173615146428347, 0.17689529061317444, 0.0763588473200798, -0.024574430659413338, -0.13141091167926788, 0.0344821996986866, -0.0374063216149807, -0.07056370377540588, 0.048655178397893906, -0.03414120525121689, 0.22696012258529663, -0.061402369290590286, -0.24329672753810883, -0.039421431720256805, 0.0621466189622879, 0.1220191940665245, -0.1616966724395752, -0.06176016479730606, -0.18894734978675842, -0.06051916256546974, -0.010404378175735474, -0.05918719246983528, 0.02205268107354641, -0.06932859122753143, -0.20260301232337952, -0.2425234317779541, -0.04454419016838074, -0.11400106549263, -0.02022719383239746, -0.15134216845035553, 0.07622595876455307, -0.0323314443230629, -0.1404413878917694, -0.056338660418987274, -0.04520038887858391, -0.026131991297006607, -0.0352761372923851, -0.0679447203874588, 0.1318240910768509, 0.034210119396448135, -0.17475582659244537, 0.13853909075260162, -0.0027398746460676193, 0.227312833070755, 0.3029572069644928, 0.004932125099003315, 0.05853061378002167, -0.07521739602088928, 0.1443275809288025, -0.2340908795595169, 0.030092637985944748, 0.040133409202098846, 0.1672351360321045, 0.05728958174586296, 0.11475440859794617, -0.07548368722200394, 0.040267568081617355, 0.16487033665180206, -0.21067440509796143, 0.036163005977869034, 0.051559075713157654, -0.05994952470064163, 0.029524143785238266, -0.04122130945324898, 0.13074155151844025, 0.1142958477139473, -0.00561982998624444, -0.09740489721298218, 0.18533651530742645, -0.10422169417142868, -0.11409182846546173, 0.02283927984535694, -0.08339140564203262, -0.13673236966133118, -0.3275497853755951, -0.0002689119428396225, 0.2842463254928589, 0.13883619010448456, -0.29149484634399414, -0.07276060432195663, -0.03179163485765457, 0.011192545294761658, 0.03802505508065224, 0.03392524644732475, -0.03972085565328598, -0.12013585865497589, -0.06272879987955093, -0.026893358677625656, 0.2430601865053177, -0.12022019177675247, -0.010466678068041801, 0.20199882984161377, 0.051095910370349884, -0.13243277370929718, 0.06056740880012512, -0.04802423343062401, -0.12318279594182968, -0.013157366774976254, -0.12076889723539352, -0.07183175534009933, -0.01982908323407173, -0.15032584965229034, -0.026652328670024872, 0.06820419430732727, -0.24668177962303162, 0.1818322390317917, -0.01959969662129879, -0.07208395004272461, -0.0680316612124443, -0.038368165493011475, 0.021410271525382996, 0.06388168036937714, 0.2293335199356079, -0.22541724145412445, 0.19133104383945465, 0.24343697726726532, -0.04034627974033356, 0.07534503191709518, 0.017645064741373062, 0.054646339267492294, -0.046512290835380554, 0.07076910138130188, -0.0960201621055603, -0.12610889971256256, -0.017934376373887062, -0.010262779891490936, 0.01885927841067314, 0.057148948311805725])

B = np.array([-0.08116298168897629, 0.1283080279827118, 0.024102725088596344, -0.03748808428645134, 0.06578215956687927, -0.07137967646121979, -0.10578329861164093, -0.0911930501461029, 0.19589228928089142, -0.09603863954544067, 0.2447616308927536, 0.07736924290657043, -0.17048686742782593, -0.1277867704629898, 0.06390697509050369, 0.12272421270608902, -0.19242052733898163, -0.08341517299413681, -0.11065894365310669, -0.09501136839389801, -0.010332206264138222, -0.008188878186047077, 0.08251037448644638, 0.04358505830168724, -0.1455313265323639, -0.3595622479915619, -0.07877802848815918, -0.18927346169948578, -0.0018955999985337257, -0.06280332803726196, -0.06073163449764252, 0.03181075677275658, -0.15109844505786896, -0.08682074397802353, 0.017340943217277527, -0.020879391580820084, 0.008258359506726265, 0.016738882288336754, 0.16803768277168274, 0.039162665605545044, -0.09613757580518723, 0.06231086328625679, 0.00924085732549429, 0.2418847680091858, 0.26051488518714905, 0.07355985790491104, 0.05239278823137283, -0.08052310347557068, 0.08884726464748383, -0.24261267483234406, 0.05618546903133392, 0.12175332009792328, 0.09056758135557175, 0.04266638681292534, 0.16591356694698334, -0.2005864679813385, 0.01018378883600235, 0.08819808065891266, -0.15550008416175842, 0.0815843716263771, 0.03018287755548954, -0.025435002520680428, -0.06714558601379395, 0.009693139232695103, 0.22243273258209229, 0.13470745086669922, -0.1363328993320465, 0.01635543815791607, 0.18212205171585083, -0.03392908349633217, 0.0398673489689827, 0.0043264636769890785, -0.15493592619895935, -0.2530894875526428, -0.23155181109905243, 0.0678660124540329, 0.31580865383148193, 0.21846994757652283, -0.20842058956623077, 0.012199334800243378, -0.12194785475730896, 0.059383176267147064, 0.0768171101808548, -0.012840969488024712, -0.11975857615470886, -0.11892750859260559, -0.03087366186082363, 0.04432998597621918, 0.09186872839927673, 0.0821407362818718, -0.018520792946219444, 0.1962793618440628, -0.0566205158829689, 0.026071354746818542, 0.007139421068131924, 0.02185123600065708, -0.11292634904384613, -0.044381096959114075, -0.18024618923664093, -0.007845945656299591, 0.010368190705776215, -0.07480168342590332, -0.0035089245066046715, 0.09972234815359116, -0.18773995339870453, 0.0474785715341568, 0.025760797783732414, -0.042169712483882904, 0.0014017894864082336, 0.1201503798365593, -0.05088714882731438, -0.02051539719104767, 0.0884844958782196, -0.2176845818758011, 0.25695914030075073, 0.23358485102653503, 0.019985560327768326, 0.17838242650032043, 0.029055196791887283, 0.04518195986747742, -0.044122979044914246, -0.043431997299194336, -0.15906637907028198, -0.07155231386423111, 0.02525237947702408, 0.02502967044711113, 0.04127159342169762, 0.011846683919429779])

C = np.array([-0.0762145072221756, 0.09951083362102509, 0.0012626983225345612, -0.05529194697737694, -0.006535547785460949, -0.012212716042995453, -0.07667708396911621, -0.07388101518154144, 0.18756520748138428, -0.07589773088693619, 0.2424328476190567, 0.06438330560922623, -0.22197730839252472, -0.13409815728664398, 0.046808283776044846, 0.14692817628383636, -0.1844339370727539, -0.051137253642082214, -0.1149090975522995, -0.1297808736562729, 0.040612753480672836, -0.002555673476308584, 0.10426937788724899, 0.026295233517885208, -0.13127824664115906, -0.35947439074516296, -0.048153407871723175, -0.17165206372737885, -0.0002263905480504036, -0.10254599899053574, -0.08338439464569092, 0.014203382655978203, -0.18179851770401, -0.13200539350509644, 0.03813670203089714, -0.012789442203938961, -0.0030085663311183453, -0.007307708729058504, 0.17558619379997253, 0.025768719613552094, -0.12877899408340454, 0.11051110923290253, 0.03616628795862198, 0.22539083659648895, 0.2838597595691681, 0.07483825087547302, -0.0036694444715976715, -0.09967216849327087, 0.11106447875499725, -0.22961333394050598, 0.06397823244333267, 0.12394970655441284, 0.06568531692028046, 0.037825535982847214, 0.09586739540100098, -0.18721607327461243, 0.01674063131213188, 0.10057111084461212, -0.15766742825508118, 0.008397659286856651, 0.039109550416469574, -0.06041106954216957, -0.046033550053834915, 0.031240269541740417, 0.2121172845363617, 0.103468157351017, -0.1224282756447792, -0.05559460073709488, 0.12153220176696777, -0.018480442464351654, 0.039875734597444534, 0.007489997893571854, -0.18950346112251282, -0.20904967188835144, -0.23732705414295197, 0.0895664244890213, 0.3778454661369324, 0.16606193780899048, -0.20442475378513336, 0.018602905794978142, -0.18367978930473328, 0.04945264756679535, 0.08889186382293701, 0.002995049115270376, -0.06196683272719383, -0.13028381764888763, -0.03548961132764816, 0.053789377212524414, 0.08386979252099991, 0.016627438366413116, -0.040179431438446045, 0.2289249151945114, -0.02149147540330887, 0.05046383664011955, 0.02314644865691662, 0.05424635857343674, -0.1627081036567688, -0.01140156015753746, -0.18031321465969086, -0.06785157322883606, 0.03336677327752113, -0.06467186659574509, 0.0466950424015522, 0.12832939624786377, -0.2377130389213562, 0.06774994730949402, 0.013810726813971996, -0.019034255295991898, 0.04477768391370773, 0.0660984218120575, -0.031004268676042557, -0.03275192156434059, 0.06632497161626816, -0.24120087921619415, 0.2647172510623932, 0.2477877289056778, 0.054315339773893356, 0.17328208684921265, 0.06950142979621887, 0.019016757607460022, -0.01211759727448225, -0.014044362120330334, -0.17701464891433716, -0.03347969055175781, 0.04914966598153114, 0.05660251900553703, 0.0644666999578476, 0.012375651858747005])

print("슝=3")
슝=3

image

# 각 벡터간의 거리를 numpy.linalg.norm를 활용하여 L2 Norm Distance로 계산합니다
distances = np.linalg.norm([A, B] - C, axis=1, ord=2)
print("Distance between A and C: {}".format(distances[0]))
print("Distance between B and C: {}".format(distances[1]))
Distance between A and C: 0.8211549091846528
Distance between B and C: 0.34743558135053815
# 만약 L2 distance 대신 L1 distance로 계산하면 전체적인 distance 값의 분포가 커진다
import numpy as np

x = np.array([1,2,3,4,5])
y = np.array([2,3,4,5,6])

print(np.linalg.norm(y-x, ord=1))  #L1 distance
print(np.linalg.norm(y-x, ord=2))  #L2 distance
5.0
2.23606797749979
def get_distance(name1, name2):
    return np.linalg.norm(embedding_dict[name1]-embedding_dict[name2], ord=2)

get_distance('obama', 'trump')
0.8885117374787042

얼굴임베딩 공간의 시각화

고차원 데이터를 저차원으로 바꾼 임베딩처럼 이런 다차원 벡터를 시각화하기 위해서 차원 축소를 하는 방법들이 있습니다. 예를 들어 PCA, T-SNE 등이 있습니다

128D FaceNet LFW Embedding Visualization

PCAT-SNE 를 이용해 우리가 쉽게 볼 수 있는 형태로 차원을 축소할 수 있었습니다. 조금 더 자세히 알아볼까요?

  • 먼저 PCA 는 주성분 분석이라는 방법으로 Principal Component Analysis 의 준말입니다. 이 방법은 모든 차원의 축에 따른 값의 변화도인 분산(Variance)을 확인한 뒤 그 중 변화가 가장 큰 주요한 축을 남기는 방법입니다.

  • T-SNE 는 고차원 상에서 먼 거리를 저차원 상에서도 멀리 배치되도록 차원을 축소하는 방식입니다. 먼저 random하게 목표하는 차원에 데이터들을 배치한 후 각 데이터들을 고차원 상에서의 배치와 비교를 하면서 위치를 변경해 주는 알고리즘으로 이해하시면 됩니다.

가장 닮은 꼴 얼굴 찾기

  • def get_cropped_face(image_file) : 이미지 파일에서 얼굴 영역을 가져오는 함수

  • def get_face_embedding(face) : 얼굴영역으로부터 얼굴 임베딩 벡터를 구하는 함수

  • def get_face_embedding_dict(dir_path) : 디렉토리 안에 있는 모든 이미지의 임베딩 딕셔너리를 구하는 함수

  • def get_distance(name1, name2) : 두 이미지(사람 이름) 사이의 임베딩 벡터 거리를 구하는 함수

def get_nearest_face(name, top=5): 
    pass
get_nearest_face('trump')
# name1과 name2의 거리를 비교하는 함수를 생성하되, name1은 미리 지정하고, name2는 호출시에 인자로 받도록 합니다.
def get_sort_key_func(name1):
    def get_distance_from_name1(name2):
        return get_distance(name1, name2)
    return get_distance_from_name1

sort_key_func = get_sort_key_func('trump')   
# 이렇게 생성된 함수 sort_key_func는 sort_key_func('obama') 라고 호출할 때 trump와 obama 사이의 임베딩 벡터 거리를 계산합니다.
sorted(embedding_dict.items(), key=lambda x:sort_key_func(x[0]))
[('trump',
  array([-0.15962467,  0.20268655,  0.0323635 ,  0.02439232, -0.06226439,
          0.05140705,  0.07351795, -0.16823348,  0.06962204, -0.05557961,
          0.17270109, -0.05094168, -0.35293093, -0.0316438 ,  0.03792984,
          0.17343847, -0.13260126, -0.15357377, -0.23523533, -0.08442692,
         -0.00134139,  0.03210667, -0.08926154, -0.02670781, -0.09581181,
         -0.25792354, -0.01056999, -0.11071672,  0.03133569, -0.07444921,
          0.05670552, -0.02273796, -0.19461137, -0.04576054,  0.00947582,
          0.01806056, -0.1502624 , -0.07787319,  0.17577608,  0.01933258,
         -0.19744575, -0.03930519,  0.07374467,  0.20196027,  0.1476755 ,
         -0.0141539 ,  0.0172476 , -0.11976205,  0.09462971, -0.24318144,
         -0.02755127,  0.12169892,  0.14837705,  0.17079785,  0.06952555,
         -0.15360466,  0.02005427,  0.08199155, -0.19544823,  0.10653654,
          0.11468476, -0.21902837, -0.07677591, -0.06791042,  0.10587606,
          0.05921567, -0.06115703, -0.11171092,  0.23917492, -0.15791436,
         -0.13834007,  0.01942121,  0.01826206, -0.1869376 , -0.32023543,
          0.01407088,  0.34068239,  0.20081407, -0.19736893, -0.06809217,
         -0.0536031 , -0.03747801,  0.07641605,  0.09667511, -0.04759219,
         -0.09811257, -0.05056169, -0.01899303,  0.24124743, -0.04360487,
          0.00636286,  0.21936756,  0.01126812, -0.04524586, -0.00227514,
          0.02113587, -0.11754515, -0.05986457, -0.08348871, -0.07204333,
         -0.00065423, -0.14080839,  0.03825339,  0.08760153, -0.11946712,
          0.18282008,  0.03533202,  0.01670191, -0.00815773, -0.00964593,
         -0.06726068,  0.01727787,  0.16389623, -0.16469438,  0.25836599,
          0.20883362, -0.0511426 ,  0.0673226 ,  0.11310361,  0.08522274,
         -0.01515304, -0.03709226, -0.22167102, -0.18071674,  0.06958843,
          0.02523582, -0.01741696,  0.03869874])),
 ('reagan',
  array([-0.07375357,  0.10346214,  0.05566559,  0.0695718 , -0.05440289,
         -0.01463646, -0.00649548, -0.09569547,  0.09449113, -0.04624915,
          0.24829058, -0.09732288, -0.32321307, -0.02170691, -0.05216696,
          0.08848224, -0.17542514, -0.00346639, -0.21609756, -0.05386934,
          0.05907623,  0.05206383, -0.00552298, -0.03122731, -0.09512019,
         -0.28428546, -0.03761142, -0.10027184,  0.08096444, -0.08889769,
         -0.02716124, -0.03971988, -0.27534494, -0.16471916, -0.01999875,
          0.0016112 , -0.04278624, -0.04995842,  0.10131402, -0.01418073,
         -0.18135472,  0.03161946,  0.04568903,  0.17763813,  0.22100508,
          0.02493148, -0.03450565, -0.15785582,  0.11732896, -0.29776844,
          0.00568237,  0.09540917,  0.17411588,  0.11876013,  0.13139085,
         -0.14378829, -0.02134267,  0.11733156, -0.13780983,  0.06715038,
          0.05229622, -0.03175481,  0.03515494, -0.08317695,  0.04386129,
          0.05435885, -0.02304641, -0.12853993,  0.17707913, -0.1982398 ,
         -0.04517204,  0.13875599,  0.04342822, -0.15879723, -0.37596074,
         -0.05904536,  0.38085103,  0.13067873, -0.19391856, -0.06296147,
         -0.11909108, -0.07099086,  0.06326488, -0.01472369, -0.01706395,
         -0.02058249, -0.07531797,  0.04612498,  0.2340861 , -0.0592877 ,
          0.0136845 ,  0.22591993, -0.01238802, -0.04330973,  0.06063112,
          0.08095851, -0.14738627, -0.04548051, -0.11298802, -0.01335901,
          0.0267882 , -0.13438657,  0.00160147,  0.05713233, -0.17619428,
          0.14030355, -0.0064233 , -0.03354643,  0.02639963, -0.08354585,
         -0.03018388,  0.10711893,  0.20899144, -0.26083925,  0.24738196,
          0.14037034, -0.07651532,  0.08729064, -0.01822417,  0.03990374,
         -0.09170216, -0.05556964, -0.18002766, -0.18664891, -0.0356429 ,
         -0.04261817, -0.00853308, -0.01145224])),
 ('biden',
  array([-3.94153334e-02,  1.71419963e-01,  6.19103611e-02,  1.20813865e-03,
         -1.26029924e-01,  4.76813056e-02, -7.64570385e-02, -1.04280867e-01,
          7.64882043e-02, -2.97834557e-02,  2.05395296e-01, -3.52288187e-02,
         -2.62895197e-01,  3.47554944e-02,  4.22611237e-02,  1.28316373e-01,
         -9.96532217e-02, -8.62354189e-02, -1.99026316e-01, -9.20077860e-02,
         -4.13053622e-03, -1.84574572e-03,  7.12383687e-02, -8.01764894e-04,
         -1.72330856e-01, -2.43130982e-01, -6.68652281e-02, -1.27878666e-01,
         -5.89288399e-02, -1.38829440e-01,  2.90382653e-02, -7.59892464e-02,
         -1.80036172e-01, -6.59002885e-02, -8.85275006e-03, -5.45807369e-03,
         -7.63356015e-02, -8.49744529e-02,  1.49971962e-01,  4.51882090e-03,
         -1.37575701e-01,  6.32099435e-02,  2.69454066e-03,  2.15510115e-01,
          2.17975736e-01,  2.67775599e-02,  6.51229918e-02, -8.41554403e-02,
          1.19427636e-01, -2.07337141e-01,  3.73023227e-02,  5.67626469e-02,
          1.76523373e-01,  5.84511086e-02,  8.71435180e-02, -8.61171708e-02,
          3.91412154e-02,  1.81429118e-01, -1.66551292e-01,  1.15131907e-01,
          4.45637666e-02, -7.72319511e-02,  5.94520569e-03, -5.48956022e-02,
          2.15550780e-01,  1.62481323e-01, -4.54511605e-02, -1.22476727e-01,
          1.85941517e-01, -8.48648101e-02, -1.18062168e-01,  5.99945076e-02,
         -1.21605873e-01, -1.64801836e-01, -3.04567844e-01, -3.61244790e-02,
          3.68216187e-01,  4.10148762e-02, -3.01204383e-01, -1.31272703e-01,
         -2.96852887e-02,  8.83649476e-03,  8.78813304e-03,  6.53453544e-02,
         -5.47639839e-02, -1.27447486e-01, -9.06510800e-02,  8.45456216e-03,
          3.23339522e-01, -1.19672455e-01, -5.59653156e-02,  1.87589496e-01,
          3.12539935e-02, -7.29634017e-02,  5.08752726e-02,  3.91780846e-02,
         -1.11397795e-01, -2.93459818e-02, -1.01636231e-01, -1.65463705e-02,
         -3.56077775e-02, -1.16203457e-01, -7.09438622e-02,  8.43822733e-02,
         -1.63478285e-01,  1.42183632e-01, -2.51595862e-02, -6.60387129e-02,
         -7.88670257e-02,  1.51667395e-04, -6.92283735e-02, -1.20748542e-02,
          2.38132715e-01, -2.04149559e-01,  2.09254473e-01,  2.22607568e-01,
         -3.95926982e-02,  5.09544238e-02, -4.50846851e-02,  9.35904682e-02,
         -2.53209118e-02,  8.85853767e-02, -1.56866565e-01, -1.34990171e-01,
          1.49547346e-02, -2.53867805e-02, -4.60821483e-03,  5.20500951e-02])),
 ('clinton',
  array([-0.04508969,  0.11096706,  0.04037839, -0.02596069, -0.13629276,
          0.05832736, -0.00791329, -0.09079798,  0.07800291, -0.06751928,
          0.07836544, -0.01847955, -0.20820636, -0.01413895,  0.03679249,
          0.08727276, -0.09368371, -0.12994473, -0.13642883, -0.08290243,
         -0.05130973,  0.03095647, -0.07930112,  0.00835842, -0.1596638 ,
         -0.25370046, -0.05857152,  0.00354732,  0.06525847, -0.05510001,
          0.06141135,  0.07477431, -0.18673925, -0.01823538, -0.00579874,
          0.0627984 , -0.11167405, -0.04451566,  0.10954212,  0.03735855,
         -0.18664123,  0.02195602,  0.00140925,  0.2867555 ,  0.26321203,
          0.05795337,  0.03520405, -0.04231023,  0.10563868, -0.25683585,
          0.08387342,  0.10793852,  0.09012961,  0.11464255, -0.06066907,
         -0.10177629,  0.03965353,  0.15413532, -0.19503416,  0.06701506,
          0.10005981, -0.19811633, -0.06047497, -0.00161708,  0.1978557 ,
          0.1070812 , -0.04738703, -0.16971821,  0.19489385, -0.20560056,
         -0.09854932,  0.05298131, -0.03782267, -0.10221337, -0.27675027,
          0.07568289,  0.41247922,  0.1883022 , -0.15817063, -0.00522887,
         -0.08230484,  0.06390021,  0.11653241,  0.06747078, -0.07001634,
         -0.14668852, -0.05507295,  0.0080837 ,  0.27537596, -0.01530384,
         -0.0812595 ,  0.18434253,  0.09375615, -0.06444204,  0.03225123,
         -0.00673253, -0.05144547,  0.0044758 , -0.05725317, -0.09340777,
          0.08545437, -0.05549682,  0.06186868,  0.14917053, -0.19328932,
          0.37587884,  0.02196722,  0.06623412,  0.13318247, -0.02438419,
         -0.04212693, -0.01964648,  0.19658604, -0.23843254,  0.17401865,
          0.14546792,  0.03816431,  0.1315241 ,  0.07140416,  0.12920363,
         -0.00130752, -0.01432464, -0.17555533, -0.08830181,  0.08549964,
         -0.03470356,  0.08567261,  0.07909804])),
 ('bush',
  array([-0.02625503,  0.15450698,  0.00472489, -0.03476544, -0.15254432,
          0.05891469, -0.07644974, -0.04859757,  0.05708186,  0.0612596 ,
          0.22733507, -0.0701563 , -0.25514913, -0.01923161, -0.00678502,
          0.14752488, -0.06545286, -0.08778823, -0.1775232 , -0.08251506,
         -0.11398399, -0.03021468,  0.02837485, -0.00353198, -0.07156367,
         -0.30014682, -0.02000125, -0.09253028,  0.09856176, -0.11015291,
         -0.06019321,  0.03246266, -0.27596986, -0.08158397,  0.06055886,
          0.06587014, -0.13416417, -0.05833671,  0.22474164, -0.0013185 ,
         -0.10397325,  0.05651437,  0.10324919,  0.24441974,  0.2173799 ,
          0.04672227,  0.02969756, -0.02270294,  0.13914412, -0.26273748,
          0.08655584,  0.1326735 ,  0.14884262,  0.09532433,  0.08704877,
         -0.16298953,  0.02034645,  0.1143412 , -0.15999599,  0.03486384,
         -0.00913465,  0.02462598, -0.12437406, -0.12725222,  0.28271464,
          0.14624983, -0.05642514, -0.13049671,  0.19838168, -0.09549382,
          0.00749957,  0.06430215, -0.09843632, -0.1162481 , -0.27670771,
          0.08604768,  0.28489399,  0.06307237, -0.23341507,  0.01152432,
          0.0209262 , -0.06046003, -0.01813189, -0.02722367, -0.11043029,
         -0.02944648, -0.04436475,  0.00585274,  0.27488008, -0.12345333,
          0.0385978 ,  0.1651496 ,  0.01608193, -0.02587827,  0.04746444,
          0.02921766, -0.06794278, -0.09760372, -0.19656235, -0.00629183,
          0.04683928, -0.15104854, -0.06103569,  0.07032564, -0.22625379,
          0.07374706,  0.01419976, -0.063246  , -0.04633452, -0.03164241,
         -0.0846636 , -0.04456606,  0.2119094 , -0.26451644,  0.29778484,
          0.15534681, -0.02818482,  0.03885474,  0.06340384,  0.03906147,
          0.02593869,  0.0397716 , -0.09250654, -0.16882761,  0.05614127,
         -0.04519947, -0.08137785,  0.05687324])),
 ('obama',
  array([-0.08126248,  0.11014761, -0.01082448, -0.05268792,  0.01033717,
         -0.00813808, -0.07251769, -0.0922646 ,  0.19304723, -0.09246384,
          0.23908533,  0.06770287, -0.22123125, -0.14119367,  0.05562152,
          0.13705511, -0.17924425, -0.07232306, -0.11183281, -0.11370818,
          0.03956435, -0.01019115,  0.0942178 ,  0.04493114, -0.12757549,
         -0.34449592, -0.05374938, -0.17703305,  0.00868226, -0.09721592,
         -0.09817187,  0.00278232, -0.17721367, -0.12789807,  0.03489833,
         -0.01991234, -0.00838933, -0.00132862,  0.18308581,  0.02447568,
         -0.1237656 ,  0.09957044,  0.02323568,  0.22983313,  0.2838524 ,
          0.06655717, -0.00558912, -0.09833544,  0.10058986, -0.23476946,
          0.06006292,  0.1341591 ,  0.08373027,  0.03900504,  0.10194533,
         -0.19337648,  0.01717628,  0.08978969, -0.16005114,  0.01892098,
          0.03082444, -0.06051281, -0.04526773,  0.04633342,  0.20678686,
          0.10299795, -0.12437531, -0.0490516 ,  0.12252682, -0.0280379 ,
          0.04286709,  0.01386448, -0.18673278, -0.22230086, -0.23382807,
          0.07662997,  0.3739067 ,  0.18993473, -0.20606737,  0.0244521 ,
         -0.18487695,  0.04949443,  0.09670059, -0.00123947, -0.0688749 ,
         -0.13414779, -0.04103697,  0.06179445,  0.07108597,  0.02471443,
         -0.04194992,  0.22575834, -0.02099216,  0.04838851,  0.02123774,
          0.05604827, -0.14657559, -0.02243515, -0.17425998, -0.06527615,
          0.02526723, -0.04892462,  0.05058879,  0.13397783, -0.23065181,
          0.06054964,  0.02031171, -0.0191126 ,  0.03631671,  0.06983903,
         -0.03496742, -0.03203756,  0.05493437, -0.24500111,  0.24993542,
          0.24442896,  0.04081136,  0.16412377,  0.06019448,  0.0062465 ,
         -0.00932551, -0.02752422, -0.17775527, -0.03611944,  0.04782868,
          0.06031797,  0.06846569,  0.00587987]))]
def get_nearest_face(name, top=5):
    sort_key_func = get_sort_key_func(name)
    sorted_faces = sorted(embedding_dict.items(), key=lambda x:sort_key_func(x[0]))
    
    for i in range(top+1):
        if i == 0 :   # 첫번째로 나오는 이름은 자기 자신일 것이므로 제외합시다. 
            continue
        if sorted_faces[i]:
            print('순위 {} : 이름({}), 거리({})'.format(i, sorted_faces[i][0], sort_key_func(sorted_faces[i][0])))
# obama와 가장 닮은 사람은 누굴까요?
get_nearest_face('obama')
순위 1 : 이름(biden), 거리(0.846720652777963)
순위 2 : 이름(bush), 거리(0.8639189288560225)
순위 3 : 이름(reagan), 거리(0.8692435806803928)
순위 4 : 이름(trump), 거리(0.8885117374787042)
순위 5 : 이름(clinton), 거리(0.9000994624487408)

self project

import os

dir_path = os.getenv('HOME')+'/aiffel/face_embedding/actor_image/celebrity'
file_list = os.listdir(dir_path)

print ("file_list: {}".format(file_list))

file_list: ['잭 블랙.jpeg', '박정민.jpg', '송중기.jpeg', '윤경신.jpg', '잭 갤리퍼내키스.jpg', '리암 니슨.png', '태양.jpeg', '유재석.jpg', '최무성.jpg', '아이즈원김채원.jpg', '이희준.jpg', '수지.jpg', '손호준.jpeg', '박완규.png', '김광현.jpg', '장성호.jpeg', '피어스 브로스넌.jpg', '트와이스모모.jpeg', '차범근.jpeg', '정준하.jpg', '문별.jpg', '박보영.jpg', '김구라.jpg', '유지태.jpg', '강동원.jpg', '수호.jpg', '최양락.jpg', '이근대위.jpeg', '최현석.jpg', '세훈.jpg', '슬리피.jpg', '김남주.jpeg', '사쿠라.jpg', '방시혁.jpeg', '김영삼.jpeg', '영알남.png', '박보검.jpg', '강민경.jpg', '슈퍼비.jpeg', '조승우.jpeg', '김종서.jpg', '효정.jpg', '이안 굿펠로우.jpg', '박찬호.jpg', '장도연.jpg', '아이린.jpeg', '뷔.png', '저스틴 존슨.jpeg', '지드래곤.jpeg', '고창석.jpeg', '트와이스쯔위.jpg', '이말년.png', '리아.jpg', '채연.jpg', '레인보우재경.jpg', '조이.jpg', '최준석.jpg', 'RM.jpg', '개리.jpg', '클린트 이스트우드.jpg', '성룡.jpg', '박서준.jpeg', '아이즈원조유리.jpeg', '박용택.jpg', '드웨인 존슨.jpg', '지호.jpg', '도지한.jpg', '찬열.jpeg', '이승기.jpeg', '레오나르도 디카프리오.jpeg', '노홍철.jpeg', '이승환.jpg', '이명박.jpg', '이정재.jpeg', '마크 러팔로.jpeg', '유리.jpg', '여자아이들수진.jpeg', '류현진.jpg', '이진호.jpeg', '윤종신.jpeg', '윤균상.jpg', '이광수.jpg', '공유.jpg', '차승원.jpg', '서장훈.jpg', '신비.jpg', '내사진2.jpg', '주진모(올드).jpg', '박희순.jpg', '박신양.png', '이경규.jpeg', '크리스 에반스.jpg', '돈스파이크.jpg', '유해진.jpg', '신동엽.jpg', '대도서관.jpeg', '슬기.jpg', '무라니시.png', '박명수.jpg', '하정우.jpg', '존 파브르.jpeg', 'obama.jpg', '김현수.jpeg', '케빈 스페이시.jpeg', '태연.jpg', '로다주.jpeg', '황정민.jpg', '정수빈.jpg', '이재용.jpg', '숀 코네리.jpg', '김래원.jpeg', '철면수심.jpg', '케이.jpg', '줄리엔강.png', 'bush.jpeg', '스윙스.jpeg', '봉준호.jpg', '이용진.jpg', '옥냥이.jpeg', '잇섭.png', '윤아.jpg', '조정석.jpg', '로제.jpg', '이성민.jpg', '아이유.jpg', '온앤오프효진.jpg', '장원영.jpeg', '티모시 달튼.jpeg', '류승범.jpg', '길.jpg', '벤 에플렉.jpeg', '이제훈.jpeg', '서강준.jpg', '박진영.jpg', '톰 홀랜드.jpg', '조정치.jpg', '박찬욱.jpeg', 'clinton.jpeg', '박나래.jpeg', '차명석.jpg', '이준혁.jpeg', '도아.jpeg', '승희.jpeg', '트와이스미나.jpg', '이대호.jpeg', '박재범.jpg', 'b1a4진영.jpeg', '사무엘 L 잭슨.jpg', '티파니.jpg', '김연우.jpeg', '이동휘.jpg', '레드벨벳웬디.jpg', '류준열.jpg', '크리스탈.jpg', '최민식.jpg', '제이크 질렌할.jpeg', '제프리 힌턴.jpg', '지민(BTS).jpg', '손흥민.jpg', '내사진.png', '권현빈.jpg', '장동건.jpeg', '조인성.jpg', '류진.jpg', '산들.jpeg', '배성우.png', '백윤식.png', '정민철.jpg', '리사.jpg', 'trump.jpg', '민니.jpg', '서현진.jpg', '차두리.jpeg', '조달환.png', '심수창.jpeg', '박건우.png', '로저 무어.jpeg', '윤도현.png', '이천수.jpg', '카이.jpg', '권태원.jpeg', '황병국.jpg', '디오.jpeg', '유아.jpg', '김민재.jpeg', '이승철.jpeg', '하현우.jpg', '이범수.jpeg', '잇지예지.jpeg', '화사.jpg', '김대명.jpeg', '박휘순.jpg', '이종석.jpeg', '배성재.jpg', '김경진.png', '나인뮤지스경리.jpg', '앤드류 응.png', '켄 정.jpeg', '박해일.jpeg', '비니.jpg', '진.jpg', '온앤오프와이엇.jpg', '크리스 햄스워스.jpg', '김희철.jpg', '트와이스사나.jpg', '공효진.jpg', '러블리즈예인.jpg', '성시경.jpg', '백현.jpg', '주호민.jpeg', '김대중.jpg', '잭 스페이더.jpeg', '한효주.jpg', '박성호.jpg', '기성용.jpg', '홍구.jpg', '유노윤호.jpg', '.ipynb_checkpoints', '제이홉.jpg', '다니엘 크레이그.jpeg', '지디.jpg', 'biden.jpg', '박진영.jpeg', '전소미.jpg', '오재원.jpg', '이민호.jpeg', '개코.jpeg', '한석규.jpg', '솔라.jpg', '야마다 타카유키.jpeg', '목진화.png', '정국.jpg', '아이즈원민주.jpg', '아놀드 슈워제네거.jpg', '트와이스다현.jpeg', '슈가.jpg', '딕헌터.jpeg', '손예진.jpeg', '이효리.jpg', '이동국.jpeg', '미미.jpg', '장원준.jpeg', '잇지유나.jpg', '김고은.jpeg', '이영하.jpg', '김유정.jpg', '택연.jpg', '트와이스나연.jpg', '제이콥 배털론.jpg', '문재인.jpeg', '유희열.jpeg', '강미나.jpg', '마동석.jpeg', '제니.jpg', '하승진.jpg', '김윤석.jpeg', '신태용.jpg', '꽈두룹.jpeg', '매드클라운.jpeg', '노무현.jpeg', '안재홍.jpg', '박항서.jpg', '김상호.png', '한기범.jpg', '김학범.jpg', '조원희.jpg', '정우성.jpeg', '첸.jpg', '잭 니콜슨.jpeg', '지수.jpg', '아린.jpg', '정형돈.jpg', '류승룡.jpeg', '이연걸.jpeg', '지코.jpg', '씨잼.jpg', '우기.jpg', '시우민.jpg', '러블리즈지수.jpeg', '김C.jpeg', '브래들리 쿠퍼.jpg', '트와이스정연.jpg', '이대형.jpg', '곽도원.jpg', '온앤오프라운.jpg']

import face_recognition
import os

def get_cropped_face(image_file):
    image = face_recognition.load_image_file(image_file)
    face_locations = face_recognition.face_locations(image)
    if face_locations:
        a, b, c, d = face_locations[0]
        cropped_face = image[a:c,d:b,:]
        return cropped_face
    return []
image_file = os.path.join(dir_path, '내사진.png')
cropped_face = get_cropped_face(image_file)   # 얼굴 영역을 구하는 함수(이전 스텝에서 구현)
# 이미지 확인
%matplotlib inline

plt.imshow(cropped_face)


<matplotlib.image.AxesImage at 0x7f8c0eb58d50>

png

# 얼굴 영역을 가지고 얼굴 임베딩 벡터를 구하는 함수
def get_face_embedding(face):
    return face_recognition.face_encodings(face)

embedding = get_face_embedding(cropped_face)  
embedding
[array([-1.30213648e-01,  7.61276111e-02,  2.99772192e-02, -9.78113860e-02,
        -1.26794696e-01, -7.33424956e-03, -1.03005432e-01, -1.10268332e-01,
         1.09191120e-01, -1.00193754e-01,  2.65007794e-01, -3.94430272e-02,
        -2.03418165e-01, -1.35539278e-01, -4.18169536e-02,  1.84405908e-01,
        -1.90744892e-01, -1.10041559e-01, -1.54919485e-02,  5.96662611e-03,
         1.27282187e-01, -4.10036482e-02,  4.96505685e-02,  3.33889164e-02,
        -1.36211574e-01, -3.46584439e-01, -1.15685523e-01, -5.55737689e-02,
         2.24363469e-02, -8.00758377e-02, -7.97620043e-02, -4.75314036e-02,
        -2.44764850e-01, -4.65895236e-02, -2.68432172e-03,  1.10404730e-01,
         3.53167532e-04, -5.90883531e-02,  1.04027458e-01, -3.17025930e-02,
        -2.21672118e-01,  3.53468247e-02,  5.99180944e-02,  1.97162211e-01,
         1.89511076e-01,  4.50326204e-02,  1.36085851e-02, -1.35148957e-01,
         1.18577898e-01, -1.92472026e-01,  6.90812171e-02,  1.37558520e-01,
         7.89537951e-02,  2.44136415e-02, -1.36873992e-02, -4.34710607e-02,
         4.28553149e-02,  1.80762514e-01, -1.75069019e-01,  5.80034181e-02,
         1.13410726e-01, -4.01484258e-02, -8.05326626e-02, -9.72752348e-02,
         1.83926955e-01,  1.15356073e-01, -1.59244210e-01, -2.11863786e-01,
         1.05406880e-01, -1.38345018e-01, -7.54930675e-02,  6.57209978e-02,
        -1.42996475e-01, -2.18930021e-01, -3.43371958e-01,  4.97271493e-03,
         3.53380352e-01,  1.14617534e-01, -2.37590805e-01, -1.11302901e-02,
        -6.54153377e-02,  1.75028630e-02,  1.22302383e-01,  1.24278322e-01,
         3.19278240e-02,  1.56243779e-02, -1.52367562e-01,  3.13660093e-02,
         2.29110181e-01, -9.06396285e-02,  2.11916175e-02,  2.76709080e-01,
         3.37411538e-02,  7.34530464e-02, -1.17159486e-02,  1.44871473e-01,
        -9.05603617e-02,  1.34647358e-02, -1.39537767e-01, -7.18771527e-03,
         5.03335074e-02, -2.74904091e-02, -5.31561449e-02,  1.12458110e-01,
        -1.34873509e-01,  1.45828113e-01, -4.14934475e-03,  2.73635052e-02,
         9.90682282e-03, -7.50039518e-02, -8.10605213e-02, -9.15383697e-02,
         9.70911160e-02, -2.10494280e-01,  1.78827986e-01,  2.31571674e-01,
         8.27177837e-02,  8.31201226e-02,  1.15141928e-01,  6.48200661e-02,
         3.67616862e-03, -6.09585382e-02, -2.08133847e-01, -3.55739035e-02,
         3.32320742e-02, -8.58368278e-02,  3.45425606e-02, -7.26099638e-03])]
def get_face_embedding_dict(dir_path):
    file_list = os.listdir(dir_path)
    embedding_dict = {}
    cropped_dict = {}
    for file in file_list:
        ext = os.path.splitext(file)[-1].lower()
        if not ext.lower().endswith(('.png', '.jpg', '.jpeg')):
            continue
        img_path = os.path.join(dir_path, file)
        face = get_cropped_face(img_path)
        if face == []:
            continue
        embedding = get_face_embedding(face)
        if len(embedding) > 0:  
        # 얼굴영역 face가 제대로 detect되지 않으면  len(embedding)==0인 경우가 발생하므로 
        # os.path.splitext(file)[0]에는 이미지파일명에서 확장자를 제거한 이름이 담깁니다. 
            embedding_dict[os.path.splitext(file)[0]] = embedding[0]
            cropped_dict[os.path.splitext(file)[0]] = face

    return embedding_dict, cropped_dict


embedding_dict, cropped_dict = get_face_embedding_dict(dir_path)
embedding_dict['내사진']
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:11: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.
  # This is added back by InteractiveShellApp.init_path()





array([-1.30213648e-01,  7.61276111e-02,  2.99772192e-02, -9.78113860e-02,
       -1.26794696e-01, -7.33424956e-03, -1.03005432e-01, -1.10268332e-01,
        1.09191120e-01, -1.00193754e-01,  2.65007794e-01, -3.94430272e-02,
       -2.03418165e-01, -1.35539278e-01, -4.18169536e-02,  1.84405908e-01,
       -1.90744892e-01, -1.10041559e-01, -1.54919485e-02,  5.96662611e-03,
        1.27282187e-01, -4.10036482e-02,  4.96505685e-02,  3.33889164e-02,
       -1.36211574e-01, -3.46584439e-01, -1.15685523e-01, -5.55737689e-02,
        2.24363469e-02, -8.00758377e-02, -7.97620043e-02, -4.75314036e-02,
       -2.44764850e-01, -4.65895236e-02, -2.68432172e-03,  1.10404730e-01,
        3.53167532e-04, -5.90883531e-02,  1.04027458e-01, -3.17025930e-02,
       -2.21672118e-01,  3.53468247e-02,  5.99180944e-02,  1.97162211e-01,
        1.89511076e-01,  4.50326204e-02,  1.36085851e-02, -1.35148957e-01,
        1.18577898e-01, -1.92472026e-01,  6.90812171e-02,  1.37558520e-01,
        7.89537951e-02,  2.44136415e-02, -1.36873992e-02, -4.34710607e-02,
        4.28553149e-02,  1.80762514e-01, -1.75069019e-01,  5.80034181e-02,
        1.13410726e-01, -4.01484258e-02, -8.05326626e-02, -9.72752348e-02,
        1.83926955e-01,  1.15356073e-01, -1.59244210e-01, -2.11863786e-01,
        1.05406880e-01, -1.38345018e-01, -7.54930675e-02,  6.57209978e-02,
       -1.42996475e-01, -2.18930021e-01, -3.43371958e-01,  4.97271493e-03,
        3.53380352e-01,  1.14617534e-01, -2.37590805e-01, -1.11302901e-02,
       -6.54153377e-02,  1.75028630e-02,  1.22302383e-01,  1.24278322e-01,
        3.19278240e-02,  1.56243779e-02, -1.52367562e-01,  3.13660093e-02,
        2.29110181e-01, -9.06396285e-02,  2.11916175e-02,  2.76709080e-01,
        3.37411538e-02,  7.34530464e-02, -1.17159486e-02,  1.44871473e-01,
       -9.05603617e-02,  1.34647358e-02, -1.39537767e-01, -7.18771527e-03,
        5.03335074e-02, -2.74904091e-02, -5.31561449e-02,  1.12458110e-01,
       -1.34873509e-01,  1.45828113e-01, -4.14934475e-03,  2.73635052e-02,
        9.90682282e-03, -7.50039518e-02, -8.10605213e-02, -9.15383697e-02,
        9.70911160e-02, -2.10494280e-01,  1.78827986e-01,  2.31571674e-01,
        8.27177837e-02,  8.31201226e-02,  1.15141928e-01,  6.48200661e-02,
        3.67616862e-03, -6.09585382e-02, -2.08133847e-01, -3.55739035e-02,
        3.32320742e-02, -8.58368278e-02,  3.45425606e-02, -7.26099638e-03])
def get_distance(name1, name2):
    return np.linalg.norm(embedding_dict[name1]-embedding_dict[name2], ord=2)
# 내 사진으로 비교
get_distance('내사진','내사진2')
0.30859142484350327
def get_nearest_face(name, top=5): 
    pass
get_nearest_face('내사진')
# name1과 name2의 거리를 비교하는 함수를 생성하되, name1은 미리 지정하고, name2는 호출시에 인자로 받도록 합니다.
def get_sort_key_func(name1):
    def get_distance_from_name1(name2):
        return get_distance(name1, name2)
    return get_distance_from_name1

sort_key_func = get_sort_key_func('내사진')   
# 이렇게 생성된 함수 sort_key_func는 sort_key_func('obama') 라고 호출할 때 trump와 obama 사이의 임베딩 벡터 거리를 계산합니다.
sorted(embedding_dict.items(), key=lambda x:sort_key_func(x[0]))
[('내사진',
  array([-1.30213648e-01,  7.61276111e-02,  2.99772192e-02, -9.78113860e-02,
         -1.26794696e-01, -7.33424956e-03, -1.03005432e-01, -1.10268332e-01,
          1.09191120e-01, -1.00193754e-01,  2.65007794e-01, -3.94430272e-02,
         -2.03418165e-01, -1.35539278e-01, -4.18169536e-02,  1.84405908e-01,
         ...
         ...
def get_nearest_face(name, top=5, print_images=False):
    sort_key_func = get_sort_key_func(name)
    sorted_faces = sorted(embedding_dict.items(), key=lambda x:sort_key_func(x[0]))
    
    for i in range(top+1):
        if i == 0 :   # 첫번째로 나오는 이름은 자기 자신일 것이므로 제외합시다. 
            continue
        if sorted_faces[i]:
            print('순위 {} : 이름({}), 거리({})'.format(i, sorted_faces[i][0], sort_key_func(sorted_faces[i][0])))
    if print_images:
        fig = plt.figure(figsize=(15, 5))
        fig.add_subplot(2, top, 1)
        plt.imshow(cropped_dict[sorted_faces[0][0]])
        for i in range(1, top+1):
            fig.add_subplot(2, top, i+5)
            plt.imshow(cropped_dict[sorted_faces[i][0]])

# obama와 가장 닮은 사람은 누굴까요?
get_nearest_face('내사진',print_images=True)
순위 1 : 이름(내사진2), 거리(0.30859142484350327)
순위 2 : 이름(택연), 거리(0.4042503765985488)
순위 3 : 이름(박재범), 거리(0.4209329811135236)
순위 4 : 이름(제이홉), 거리(0.4308021182146934)
순위 5 : 이름(카이), 거리(0.43823314623915355)

png




Leave a comment