前回の続きです。今度は二値画像にk-meansを適用し、細線化?してみます。
二値画像においてk-meansが細線化や情報削減に利用できるのではないかと思えるような結果になりました。今回は、scipyのkmeans2を使用し、代表点の初期配置を領域内に収めることで、細線化?を実現します。
ソース:
(関係のないモジュールをimportしまくってますが無視してください。)
# -*- coding: utf-8 -*- from itertools import combinations from itertools import product import random import re import numpy as np from numpy import arange import matplotlib.pyplot as plt from scipy.ndimage import zoom from scipy.fftpack import fft, ifft from scipy.signal import resample from scipy.spatial import Delaunay import matplotlib.tri as tri from scipy.cluster.vq import kmeans,vq from scipy.cluster.vq import kmeans2,vq def rawObjectToMatrix(ro): ret = [] lines = ro.splitlines() for line in lines: if line.strip()=="": continue ret.append(map(lambda x: 0 if x == " " else 1, line)) whiteRow = [[0 for _ in xrange(len(ret[0]))]] return whiteRow + ret + whiteRow def pb(lst2d): ret = "" for lst in lst2d: lst = map(abs, lst) ret += "".join(map(str, lst)) + "\n" print "" print ret def readMyData(): with open("a.txt", 'r') as fp: text = fp.read() objectRawDataList = re.split(r"\n\n", text) objectRawDataList = filter(lambda x: x != "", objectRawDataList) return map(lambda ro: rawObjectToMatrix(ro), objectRawDataList) lst = readMyData() sampleMatrix = zoom(np.array(lst[5]), 1.5) pb(sampleMatrix) height = sampleMatrix.shape[0] width = sampleMatrix.shape[1] mat = sampleMatrix.copy() for rn, _ in enumerate(mat): for cn, __ in enumerate(_): if mat[rn, cn] == 1 and mat[rn + 1, cn] == 1: mat[rn, cn] = 0 out = mat mat = sampleMatrix.copy() for rn, _ in enumerate(mat): for cn, __ in enumerate(_): if mat[(height - 1) - rn, cn] == 1 and mat[(height - 1) - rn - 1, cn] == 1: mat[(height - 1) - rn, cn] = 0 out |= mat mat = sampleMatrix.copy() for rn, _ in enumerate(mat): for cn, __ in enumerate(_): if mat[rn, cn] == 1 and mat[rn, cn + 1] == 1: mat[rn, cn] = 0 out |= mat mat = sampleMatrix.copy() for rn, _ in enumerate(mat): for cn, __ in enumerate(_): if mat[rn, (width - 1) - cn] == 1 and mat[rn, (width - 1) - cn - 1] == 1: mat[rn, (width - 1) - cn] = 0 out |= mat firstPosFlat = out.flatten().argmax() pos = firstPosFlat / width, firstPosFlat % width startPos = pos code = [] while True: if out[pos[0] - 1, pos[1] ] == 1: out[pos[0] - 1, pos[1] ] = 2 pos = pos[0] - 1, pos[1] code.append((-1, 0)) elif out[pos[0] - 1, pos[1] + 1] == 1: out[pos[0] - 1, pos[1] + 1] = 2 pos = pos[0] - 1, pos[1] + 1 code.append((-1, 1)) elif out[pos[0] , pos[1] + 1] == 1: out[pos[0] , pos[1] + 1] = 2 pos = pos[0] , pos[1] + 1 code.append((0, 1)) elif out[pos[0] + 1, pos[1] + 1] == 1: out[pos[0] + 1, pos[1] + 1] = 2 pos = pos[0] + 1, pos[1] + 1 code.append((1, 1)) elif out[pos[0] + 1, pos[1] ] == 1: out[pos[0] + 1, pos[1] ] = 2 pos = pos[0] + 1, pos[1] code.append((1, 0)) elif out[pos[0] + 1, pos[1] - 1] == 1: out[pos[0] + 1, pos[1] - 1] = 2 pos = pos[0] + 1, pos[1] - 1 code.append((1, -1)) elif out[pos[0] , pos[1] - 1] == 1: out[pos[0] , pos[1] - 1] = 2 pos = pos[0] , pos[1] - 1 code.append((0, -1)) elif out[pos[0] - 1, pos[1] - 1] == 1: out[pos[0] - 1, pos[1] - 1] = 2 pos = pos[0] - 1, pos[1] - 1 code.append((-1, -1)) if pos == startPos: break pr = startPos[0] pc = startPos[1] prlst = [] pclst = [] p = [] for n, c in enumerate(code): dr, dc = c pr += dr pc += dc prlst.append(pr) pclst.append(pc) p.append((pr, pc)) prlst = np.array(prlst) pclst = np.array(pclst) fig, ax = plt.subplots(1) ax.plot(pclst, prlst) numOfCentroids = 10 k = np.array(random.sample(zip(*np.where(sampleMatrix == 1)), numOfCentroids), dtype=np.float) observation = np.array(zip(*np.where(sampleMatrix==1)), dtype=np.float) # computing K-Means with K = 2 (2 clusters) centroids,_ = kmeans2(observation, k, minit="point") # assign each sample to a cluster idx,_ = vq(observation,centroids) centroidsRow, centroidsCol = centroids.T centroidsRow = map(lambda x: int(round(x)), centroidsRow) centroidsCol = map(lambda x: int(round(x)), centroidsCol) sampleMatrixDummy = sampleMatrix.copy() sampleMatrixDummy[centroidsRow, centroidsCol] = 5 pb(sampleMatrixDummy) ax.plot(centroids[:,1], centroids[:,0], "sm") fig.savefig("/Users/kei/Desktop/saisenka.png")
出力結果:
赤い点が周辺領域の代表点になります。形状の情報をよく表しているのではないかと思います。k-meansは形状の情報削減、圧縮、細線化などに使えそうなことがわかると思います。