[EDIT]
While the solution below works, it can be made better. Here is a version with better names and better performance:
from itertools import product
from PIL import Image, ImageDraw
def closed_regions(image, test):
"""
Return all closed regions in image who's pixels satisfy test.
"""
pixel = image.load()
xs, ys = map(xrange, image.size)
neighbors = dict((xy, set([xy])) for xy in product(xs, ys) if test(pixel[xy]))
for a, b in neighbors:
for cd in (a + 1, b), (a, b + 1):
if cd in neighbors:
neighbors[a, b].add(cd)
neighbors[cd].add((a, b))
seen = set()
def component(node, neighbors=neighbors, seen=seen, see=seen.add):
todo = set([node])
next_todo = todo.pop
while todo:
node = next_todo()
see(node)
todo |= neighbors[node] - seen
yield node
return (set(component(node)) for node in neighbors if node not in seen)
def boundingbox(coordinates):
"""
Return the bounding box that contains all coordinates.
"""
xs, ys = zip(*coordinates)
return min(xs), min(ys), max(xs), max(ys)
def is_black_enough(pixel):
r, g, b = pixel
return r < 10 and g < 10 and b < 10
if __name__ == '__main__':
image = Image.open('some_image.jpg')
draw = ImageDraw.Draw(image)
for rect in disjoint_areas(image, is_black_enough):
draw.rectangle(boundingbox(region), outline=(255, 0, 0))
image.show()
Unlike disjoint_areas()
below, closed_regions()
returns sets of pixel coordinates instead of their bounding boxes.
Also, if we use flooding instead of the connected components algorithm, we can make it even simpler and about twice as fast:
from itertools import chain, product
from PIL import Image, ImageDraw
flatten = chain.from_iterable
def closed_regions(image, test):
"""
Return all closed regions in image who's pixel satisfy test.
"""
pixel = image.load()
xs, ys = map(xrange, image.size)
todo = set(xy for xy in product(xs, ys) if test(pixel[xy]))
while todo:
region = set()
edge = set([todo.pop()])
while edge:
region |= edge
todo -= edge
edge = todo.intersection(
flatten(((x - 1, y), (x, y - 1), (x + 1, y), (x, y + 1)) for x, y in edge))
yield region
# rest like above
It was inspired by Eric S. Raymond's version of floodfill.
[/EDIT]
One could probably use floodfill, but I like this:
from collections import defaultdict
from PIL import Image, ImageDraw
def connected_components(edges):
"""
Given a graph represented by edges (i.e. pairs of nodes), generate its
connected components as sets of nodes.
Time complexity is linear with respect to the number of edges.
"""
neighbors = defaultdict(set)
for a, b in edges:
neighbors[a].add(b)
neighbors[b].add(a)
seen = set()
def component(node, neighbors=neighbors, seen=seen, see=seen.add):
unseen = set([node])
next_unseen = unseen.pop
while unseen:
node = next_unseen()
see(node)
unseen |= neighbors[node] - seen
yield node
return (set(component(node)) for node in neighbors if node not in seen)
def matching_pixels(image, test):
"""
Generate all pixel coordinates where pixel satisfies test.
"""
width, height = image.size
pixels = image.load()
for x in xrange(width):
for y in xrange(height):
if test(pixels[x, y]):
yield x, y
def make_edges(coordinates):
"""
Generate all pairs of neighboring pixel coordinates.
"""
coordinates = set(coordinates)
for x, y in coordinates:
if (x - 1, y - 1) in coordinates:
yield (x, y), (x - 1, y - 1)
if (x, y - 1) in coordinates:
yield (x, y), (x, y - 1)
if (x + 1, y - 1) in coordinates:
yield (x, y), (x + 1, y - 1)
if (x - 1, y) in coordinates:
yield (x, y), (x - 1, y)
yield (x, y), (x, y)
def boundingbox(coordinates):
"""
Return the bounding box of all coordinates.
"""
xs, ys = zip(*coordinates)
return min(xs), min(ys), max(xs), max(ys)
def disjoint_areas(image, test):
"""
Return the bounding boxes of all non-consecutive areas
who's pixels satisfy test.
"""
for each in connected_components(make_edges(matching_pixels(image, test))):
yield boundingbox(each)
def is_black_enough(pixel):
r, g, b = pixel
return r < 10 and g < 10 and b < 10
if __name__ == '__main__':
image = Image.open('some_image.jpg')
draw = ImageDraw.Draw(image)
for rect in disjoint_areas(image, is_black_enough):
draw.rectangle(rect, outline=(255, 0, 0))
image.show()
Here, pairs of neighboring pixels that both satisfy is_black_enough()
are interpreted as edges in a graph. Also, every pixel is viewed as its own neighbor. Due to this re-interpretation we can use the connected component algorithm for graphs which is quite easy to implement. The result is the sequence of the bounding boxes of all areas who's pixels satisfy is_black_enough()
.