Kei Minagawa's Blog

皆川圭(@keimina)のブログ、Pythonで試したことを書いていきます

二値画像にk-meansを適用し、細線化?してみる

前回の続きです。今度は二値画像にk-meansを適用し、細線化?してみます。
二値画像においてk-meansが細線化や情報削減に利用できるのではないかと思えるような結果になりました。今回は、scipyのkmeans2を使用し、代表点の初期配置を領域内に収めることで、細線化?を実現します。

ソース:
(関係のないモジュールをimportしまくってますが無視してください。)

# -*- coding: utf-8 -*-
from itertools import combinations
from itertools import product
import random
import re
import numpy as np
from numpy import arange
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
from scipy.fftpack import fft, ifft
from scipy.signal import resample
from scipy.spatial import Delaunay
import matplotlib.tri as tri
from scipy.cluster.vq import kmeans,vq
from scipy.cluster.vq import kmeans2,vq

def rawObjectToMatrix(ro):
    ret = []
    lines = ro.splitlines()
    for line in lines:
        if line.strip()=="":
            continue
        ret.append(map(lambda x: 0 if x == " " else 1, line))
    whiteRow = [[0 for _ in xrange(len(ret[0]))]]
    return whiteRow + ret + whiteRow

def pb(lst2d):
    ret = ""
    for lst in lst2d:
        lst = map(abs, lst)
        ret += "".join(map(str, lst)) + "\n"
    print ""
    print ret

def readMyData():
    with open("a.txt", 'r') as fp:
        text = fp.read()
    objectRawDataList = re.split(r"\n\n", text)
    objectRawDataList = filter(lambda x: x != "", objectRawDataList)
    return map(lambda ro: rawObjectToMatrix(ro), objectRawDataList)

lst = readMyData()

sampleMatrix = zoom(np.array(lst[5]), 1.5)
pb(sampleMatrix)

height = sampleMatrix.shape[0]
width = sampleMatrix.shape[1]

mat = sampleMatrix.copy()
for rn, _ in enumerate(mat):
    for cn, __ in enumerate(_):
        if mat[rn, cn] == 1 and mat[rn + 1, cn] == 1:
            mat[rn, cn] = 0

out = mat
mat = sampleMatrix.copy()
for rn, _ in enumerate(mat):
    for cn, __ in enumerate(_):
        if mat[(height - 1) - rn, cn] == 1 and mat[(height - 1) - rn - 1, cn] == 1:
            mat[(height - 1) - rn, cn] = 0

out |= mat
mat = sampleMatrix.copy()
for rn, _ in enumerate(mat):
    for cn, __ in enumerate(_):
        if mat[rn, cn] == 1 and mat[rn, cn + 1] == 1:
            mat[rn, cn] = 0

out |= mat
mat = sampleMatrix.copy()
for rn, _ in enumerate(mat):
    for cn, __ in enumerate(_):
        if mat[rn, (width - 1) - cn] == 1 and mat[rn, (width - 1) - cn - 1] == 1:
            mat[rn, (width - 1) - cn] = 0

out |= mat  

firstPosFlat = out.flatten().argmax()
pos = firstPosFlat / width, firstPosFlat % width
startPos = pos
code = []
while True:
    if   out[pos[0] - 1, pos[1]    ] == 1:
        out[pos[0] - 1, pos[1]    ] = 2
        pos = pos[0] - 1, pos[1]
        code.append((-1, 0))
    elif out[pos[0] - 1, pos[1] + 1] == 1:
        out[pos[0] - 1, pos[1] + 1] = 2
        pos = pos[0] - 1, pos[1] + 1
        code.append((-1, 1))
    elif out[pos[0]    , pos[1] + 1] == 1:
        out[pos[0]    , pos[1] + 1] = 2
        pos = pos[0]    , pos[1] + 1
        code.append((0, 1))
    elif out[pos[0] + 1, pos[1] + 1] == 1:
        out[pos[0] + 1, pos[1] + 1] = 2
        pos = pos[0] + 1, pos[1] + 1
        code.append((1, 1))
    elif out[pos[0] + 1, pos[1]    ] == 1:
        out[pos[0] + 1, pos[1]    ] = 2
        pos = pos[0] + 1, pos[1]    
        code.append((1, 0))
    elif out[pos[0] + 1, pos[1] - 1] == 1:
        out[pos[0] + 1, pos[1] - 1] = 2
        pos = pos[0] + 1, pos[1] - 1
        code.append((1, -1))
    elif out[pos[0]    , pos[1] - 1] == 1:
        out[pos[0]    , pos[1] - 1] = 2
        pos = pos[0]    , pos[1] - 1
        code.append((0, -1))
    elif out[pos[0] - 1, pos[1] - 1] == 1:
        out[pos[0] - 1, pos[1] - 1] = 2
        pos = pos[0] - 1, pos[1] - 1
        code.append((-1, -1))
    if pos == startPos:
        break

pr = startPos[0]
pc = startPos[1]
prlst = []
pclst = []
p = []
for n, c in enumerate(code):
    dr, dc = c
    pr += dr
    pc += dc
    prlst.append(pr)
    pclst.append(pc)
    p.append((pr, pc))

prlst = np.array(prlst)
pclst = np.array(pclst)

fig, ax = plt.subplots(1)
ax.plot(pclst, prlst)

numOfCentroids = 10
k = np.array(random.sample(zip(*np.where(sampleMatrix == 1)), numOfCentroids), dtype=np.float)
observation = np.array(zip(*np.where(sampleMatrix==1)), dtype=np.float)

# computing K-Means with K = 2 (2 clusters)
centroids,_ = kmeans2(observation, k, minit="point")

# assign each sample to a cluster
idx,_ = vq(observation,centroids)

centroidsRow, centroidsCol = centroids.T
centroidsRow = map(lambda x: int(round(x)), centroidsRow)
centroidsCol = map(lambda x: int(round(x)), centroidsCol)

sampleMatrixDummy = sampleMatrix.copy()
sampleMatrixDummy[centroidsRow, centroidsCol] = 5
pb(sampleMatrixDummy)

ax.plot(centroids[:,1], centroids[:,0], "sm")
fig.savefig("/Users/kei/Desktop/saisenka.png")

出力結果:

f:id:keimina:20160609223935p:plain

赤い点が周辺領域の代表点になります。形状の情報をよく表しているのではないかと思います。k-meansは形状の情報削減、圧縮、細線化などに使えそうなことがわかると思います。