# Copyright 2013 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import matplotlib matplotlib.use('Agg') import its.error import pylab import sys import Image import numpy import math import unittest import cStringIO import scipy.stats import copy DEFAULT_YUV_TO_RGB_CCM = numpy.matrix([ [1.000, 0.000, 1.402], [1.000, -0.344, -0.714], [1.000, 1.772, 0.000]]) DEFAULT_YUV_OFFSETS = numpy.array([0, 128, 128]) DEFAULT_GAMMA_LUT = numpy.array( [math.floor(65535 * math.pow(i/65535.0, 1/2.2) + 0.5) for i in xrange(65536)]) DEFAULT_INVGAMMA_LUT = numpy.array( [math.floor(65535 * math.pow(i/65535.0, 2.2) + 0.5) for i in xrange(65536)]) MAX_LUT_SIZE = 65536 def convert_capture_to_rgb_image(cap, ccm_yuv_to_rgb=DEFAULT_YUV_TO_RGB_CCM, yuv_off=DEFAULT_YUV_OFFSETS, props=None): """Convert a captured image object to a RGB image. Args: cap: A capture object as returned by its.device.do_capture. ccm_yuv_to_rgb: (Optional) the 3x3 CCM to convert from YUV to RGB. yuv_off: (Optional) offsets to subtract from each of Y,U,V values. props: (Optional) camera properties object (of static values); required for processing raw images. Returns: RGB float-3 image array, with pixel values in [0.0, 1.0]. """ w = cap["width"] h = cap["height"] if cap["format"] == "raw10": assert(props is not None) cap = unpack_raw10_capture(cap, props) if cap["format"] == "yuv": y = cap["data"][0:w*h] u = cap["data"][w*h:w*h*5/4] v = cap["data"][w*h*5/4:w*h*6/4] return convert_yuv420_to_rgb_image(y, u, v, w, h) elif cap["format"] == "jpeg": return decompress_jpeg_to_rgb_image(cap["data"]) elif cap["format"] == "raw": assert(props is not None) r,gr,gb,b = convert_capture_to_planes(cap, props) return convert_raw_to_rgb_image(r,gr,gb,b, props, cap["metadata"]) else: raise its.error.Error('Invalid format %s' % (cap["format"])) def unpack_raw10_capture(cap, props): """Unpack a raw-10 capture to a raw-16 capture. Args: cap: A raw-10 capture object. props: Camera propertis object. Returns: New capture object with raw-16 data. """ # Data is packed as 4x10b pixels in 5 bytes, with the first 4 bytes holding # the MSPs of the pixels, and the 5th byte holding 4x2b LSBs. w,h = cap["width"], cap["height"] if w % 4 != 0: raise its.error.Error('Invalid raw-10 buffer width') cap = copy.deepcopy(cap) cap["data"] = unpack_raw10_image(cap["data"].reshape(h,w*5/4)) cap["format"] = "raw" return cap def unpack_raw10_image(img): """Unpack a raw-10 image to a raw-16 image. Output image will have the 10 LSBs filled in each 16b word, and the 6 MSBs will be set to zero. Args: img: A raw-10 image, as a uint8 numpy array. Returns: Image as a uint16 numpy array, with all row padding stripped. """ if img.shape[1] % 5 != 0: raise its.error.Error('Invalid raw-10 buffer width') w = img.shape[1]*4/5 h = img.shape[0] # Cut out the 4x8b MSBs and shift to bits [10:2] in 16b words. msbs = numpy.delete(img, numpy.s_[4::5], 1) msbs = msbs.astype(numpy.uint16) msbs = numpy.left_shift(msbs, 2) msbs = msbs.reshape(h,w) # Cut out the 4x2b LSBs and put each in bits [2:0] of their own 8b words. lsbs = img[::, 4::5].reshape(h,w/4) lsbs = numpy.right_shift( numpy.packbits(numpy.unpackbits(lsbs).reshape(h,w/4,4,2),3), 6) lsbs = lsbs.reshape(h,w) # Fuse the MSBs and LSBs back together img16 = numpy.bitwise_or(msbs, lsbs).reshape(h,w) return img16 def convert_capture_to_planes(cap, props=None): """Convert a captured image object to separate image planes. Decompose an image into multiple images, corresponding to different planes. For YUV420 captures ("yuv"): Returns Y,U,V planes, where the Y plane is full-res and the U,V planes are each 1/2 x 1/2 of the full res. For Bayer captures ("raw" or "raw10"): Returns planes in the order R,Gr,Gb,B, regardless of the Bayer pattern layout. Each plane is 1/2 x 1/2 of the full res. For JPEG captures ("jpeg"): Returns R,G,B full-res planes. Args: cap: A capture object as returned by its.device.do_capture. props: (Optional) camera properties object (of static values); required for processing raw images. Returns: A tuple of float numpy arrays (one per plane), consisting of pixel values in the range [0.0, 1.0]. """ w = cap["width"] h = cap["height"] if cap["format"] == "raw10": assert(props is not None) cap = unpack_raw10_capture(cap, props) if cap["format"] == "yuv": y = cap["data"][0:w*h] u = cap["data"][w*h:w*h*5/4] v = cap["data"][w*h*5/4:w*h*6/4] return ((y.astype(numpy.float32) / 255.0).reshape(h, w, 1), (u.astype(numpy.float32) / 255.0).reshape(h/2, w/2, 1), (v.astype(numpy.float32) / 255.0).reshape(h/2, w/2, 1)) elif cap["format"] == "jpeg": rgb = decompress_jpeg_to_rgb_image(cap["data"]).reshape(w*h*3) return (rgb[::3].reshape(h,w,1), rgb[1::3].reshape(h,w,1), rgb[2::3].reshape(h,w,1)) elif cap["format"] == "raw": assert(props is not None) white_level = float(props['android.sensor.info.whiteLevel']) img = numpy.ndarray(shape=(h*w,), dtype='16b mappings (i.e. values in the range [0,65535]). The lut can also have fewer than 65536 entries, however it must be sized as a power of 2 (and for smaller luts, the scale must match the bitdepth). For a 16b lut of 65536 entries, the operation performed is: lut[r * 65535] / 65535 -> r' lut[g * 65535] / 65535 -> g' lut[b * 65535] / 65535 -> b' For a 10b lut of 1024 entries, the operation becomes: lut[r * 1023] / 1023 -> r' lut[g * 1023] / 1023 -> g' lut[b * 1023] / 1023 -> b' Args: img: Numpy float image array, with pixel values in [0,1]. lut: Numpy table encoding a LUT, mapping 16b integer values. Returns: Float image array after applying LUT to each pixel. """ n = len(lut) if n <= 0 or n > MAX_LUT_SIZE or (n & (n - 1)) != 0: raise its.error.Error('Invalid arg LUT size: %d' % (n)) m = float(n-1) return (lut[(img * m).astype(numpy.uint16)] / m).astype(numpy.float32) def apply_matrix_to_image(img, mat): """Multiplies a 3x3 matrix with each float-3 image pixel. Each pixel is considered a column vector, and is left-multiplied by the given matrix: [ ] r r' [ mat ] * g -> g' [ ] b b' Args: img: Numpy float image array, with pixel values in [0,1]. mat: Numpy 3x3 matrix. Returns: The numpy float-3 image array resulting from the matrix mult. """ h = img.shape[0] w = img.shape[1] img2 = numpy.empty([h, w, 3], dtype=numpy.float32) img2.reshape(w*h*3)[:] = (numpy.dot(img.reshape(h*w, 3), mat.T) ).reshape(w*h*3)[:] return img2 def get_image_patch(img, xnorm, ynorm, wnorm, hnorm): """Get a patch (tile) of an image. Args: img: Numpy float image array, with pixel values in [0,1]. xnorm,ynorm,wnorm,hnorm: Normalized (in [0,1]) coords for the tile. Returns: Float image array of the patch. """ hfull = img.shape[0] wfull = img.shape[1] xtile = math.ceil(xnorm * wfull) ytile = math.ceil(ynorm * hfull) wtile = math.floor(wnorm * wfull) htile = math.floor(hnorm * hfull) return img[ytile:ytile+htile,xtile:xtile+wtile,:].copy() def compute_image_means(img): """Calculate the mean of each color channel in the image. Args: img: Numpy float image array, with pixel values in [0,1]. Returns: A list of mean values, one per color channel in the image. """ means = [] chans = img.shape[2] for i in xrange(chans): means.append(numpy.mean(img[:,:,i], dtype=numpy.float64)) return means def compute_image_variances(img): """Calculate the variance of each color channel in the image. Args: img: Numpy float image array, with pixel values in [0,1]. Returns: A list of mean values, one per color channel in the image. """ variances = [] chans = img.shape[2] for i in xrange(chans): variances.append(numpy.var(img[:,:,i], dtype=numpy.float64)) return variances def write_image(img, fname, apply_gamma=False): """Save a float-3 numpy array image to a file. Supported formats: PNG, JPEG, and others; see PIL docs for more. Image can be 3-channel, which is interpreted as RGB, or can be 1-channel, which is greyscale. Can optionally specify that the image should be gamma-encoded prior to writing it out; this should be done if the image contains linear pixel values, to make the image look "normal". Args: img: Numpy image array data. fname: Path of file to save to; the extension specifies the format. apply_gamma: (Optional) apply gamma to the image prior to writing it. """ if apply_gamma: img = apply_lut_to_image(img, DEFAULT_GAMMA_LUT) (h, w, chans) = img.shape if chans == 3: Image.fromarray((img * 255.0).astype(numpy.uint8), "RGB").save(fname) elif chans == 1: img3 = (img * 255.0).astype(numpy.uint8).repeat(3).reshape(h,w,3) Image.fromarray(img3, "RGB").save(fname) else: raise its.error.Error('Unsupported image type') def downscale_image(img, f): """Shrink an image by a given integer factor. This function computes output pixel values by averaging over rectangular regions of the input image; it doesn't skip or sample pixels, and all input image pixels are evenly weighted. If the downscaling factor doesn't cleanly divide the width and/or height, then the remaining pixels on the right or bottom edge are discarded prior to the downscaling. Args: img: The input image as an ndarray. f: The downscaling factor, which should be an integer. Returns: The new (downscaled) image, as an ndarray. """ h,w,chans = img.shape f = int(f) assert(f >= 1) h = (h/f)*f w = (w/f)*f img = img[0:h:,0:w:,::] chs = [] for i in xrange(chans): ch = img.reshape(h*w*chans)[i::chans].reshape(h,w) ch = ch.reshape(h,w/f,f).mean(2).reshape(h,w/f) ch = ch.T.reshape(w/f,h/f,f).mean(2).T.reshape(h/f,w/f) chs.append(ch.reshape(h*w/(f*f))) img = numpy.vstack(chs).T.reshape(h/f,w/f,chans) return img def __measure_color_checker_patch(img, xc,yc, patch_size): r = patch_size/2 tile = img[yc-r:yc+r+1:, xc-r:xc+r+1:, ::] means = tile.mean(1).mean(0) return means def get_color_checker_chart_patches(img, debug_fname_prefix=None): """Return the center coords of each patch in a color checker chart. Assumptions: * Chart is vertical or horizontal w.r.t. camera, but not diagonal. * Chart is (roughly) planar-parallel to the camera. * Chart is centered in frame (roughly). * Around/behind chart is white/grey background. * The only black pixels in the image are from the chart. * Chart is 100% visible and contained within image. * No other objects within image. * Image is well-exposed. * Standard color checker chart with standard-sized black borders. The values returned are in the coordinate system of the chart; that is, the "origin" patch is the brown patch that is in the chart's top-left corner when it is in the normal upright/horizontal orientation. (The chart may be any of the four main orientations in the image.) The chart is 6x4 patches in the normal upright orientation. The return values of this function are the center coordinate of the top-left patch, and the displacement vectors to the next patches to the right and below the top-left patch. From these pieces of data, the center coordinates of any of the patches can be computed. Args: img: Input image, as a numpy array with pixels in [0,1]. debug_fname_prefix: If not None, the (string) name of a file prefix to use to save a number of debug images for visulaizing the output of this function; can be used to see if the patches are being found successfully. Returns: 6x4 list of lists of integer (x,y) coords of the center of each patch, ordered in the "chart order" (6x4 row major). """ # Shrink the original image. DOWNSCALE_FACTOR = 4 img_small = downscale_image(img, DOWNSCALE_FACTOR) # Make a threshold image, which is 1.0 where the image is black, # and 0.0 elsewhere. BLACK_PIXEL_THRESH = 0.2 mask_img = scipy.stats.threshold( img_small.max(2), BLACK_PIXEL_THRESH, 1.1, 0.0) mask_img = 1.0 - scipy.stats.threshold(mask_img, -0.1, 0.1, 1.0) if debug_fname_prefix is not None: h,w = mask_img.shape write_image(img, debug_fname_prefix+"_0.jpg") write_image(mask_img.repeat(3).reshape(h,w,3), debug_fname_prefix+"_1.jpg") # Mask image flattened to a single row or column (by averaging). # Also apply a threshold to these arrays. FLAT_PIXEL_THRESH = 0.05 flat_row = mask_img.mean(0) flat_col = mask_img.mean(1) flat_row = [0 if v < FLAT_PIXEL_THRESH else 1 for v in flat_row] flat_col = [0 if v < FLAT_PIXEL_THRESH else 1 for v in flat_col] # Start and end of the non-zero region of the flattened row/column. flat_row_nonzero = [i for i in range(len(flat_row)) if flat_row[i]>0] flat_col_nonzero = [i for i in range(len(flat_col)) if flat_col[i]>0] flat_row_min, flat_row_max = min(flat_row_nonzero), max(flat_row_nonzero) flat_col_min, flat_col_max = min(flat_col_nonzero), max(flat_col_nonzero) # Orientation of chart, and number of grid cells horz. and vertically. orient = "h" if flat_row_max-flat_row_min>flat_col_max-flat_col_min else "v" xgrids = 6 if orient=="h" else 4 ygrids = 6 if orient=="v" else 4 # Get better bounds on the patches region, lopping off some of the excess # black border. HRZ_BORDER_PAD_FRAC = 0.0138 VERT_BORDER_PAD_FRAC = 0.0395 xpad = HRZ_BORDER_PAD_FRAC if orient=="h" else VERT_BORDER_PAD_FRAC ypad = HRZ_BORDER_PAD_FRAC if orient=="v" else VERT_BORDER_PAD_FRAC xchart = flat_row_min + (flat_row_max - flat_row_min) * xpad ychart = flat_col_min + (flat_col_max - flat_col_min) * ypad wchart = (flat_row_max - flat_row_min) * (1 - 2*xpad) hchart = (flat_col_max - flat_col_min) * (1 - 2*ypad) # Get the colors of the 4 corner patches, in clockwise order, by measuring # the average value of a small patch at each of the 4 patch centers. colors = [] centers = [] for (x,y) in [(0,0), (xgrids-1,0), (xgrids-1,ygrids-1), (0,ygrids-1)]: xc = xchart + (x + 0.5)*wchart/xgrids yc = ychart + (y + 0.5)*hchart/ygrids xc = int(xc * DOWNSCALE_FACTOR + 0.5) yc = int(yc * DOWNSCALE_FACTOR + 0.5) centers.append((xc,yc)) chan_means = __measure_color_checker_patch(img, xc,yc, 32) colors.append(sum(chan_means) / len(chan_means)) # The brightest corner is the white patch, the darkest is the black patch. # The black patch should be counter-clockwise from the white patch. white_patch_index = None for i in range(4): if colors[i] == max(colors) and \ colors[(i-1+4)%4] == min(colors): white_patch_index = i%4 assert(white_patch_index is not None) # Return the coords of the origin (top-left when the chart is in the normal # upright orientation) patch's center, and the vector displacement to the # center of the second patch on the first row of the chart (when in the # normal upright orienation). origin_index = (white_patch_index+1)%4 prev_index = (origin_index-1+4)%4 next_index = (origin_index+1)%4 origin_center = centers[origin_index] prev_center = centers[prev_index] next_center = centers[next_index] vec_across = tuple([(next_center[i]-origin_center[i])/5.0 for i in [0,1]]) vec_down = tuple([(prev_center[i]-origin_center[i])/3.0 for i in [0,1]]) # Compute the center of each patch. patches = [[],[],[],[]] for yi in range(4): for xi in range(6): x0,y0 = origin_center dxh,dyh = vec_across dxv,dyv = vec_down xc = int(x0 + dxh*xi + dxv*yi) yc = int(y0 + dyh*xi + dyv*yi) patches[yi].append((xc,yc)) # Sanity check: test that the R,G,B,black,white patches are correct. patch_info = [(2,2,[0]), # Red (2,1,[1]), # Green (2,0,[2]), # Blue (3,0,[0,1,2]), # White (3,5,[])] # Black for i in range(len(patch_info)): yi,xi,high_chans = patch_info[i] low_chans = [i for i in [0,1,2] if i not in high_chans] xc,yc = patches[yi][xi] means = __measure_color_checker_patch(img, xc,yc, 64) if (min([means[i] for i in high_chans]+[1]) < \ max([means[i] for i in low_chans]+[0])): print "Color patch sanity check failed: patch", i # If the debug info is requested, then don't assert that the patches # are matched, to allow the caller to see the output. if debug_fname_prefix is None: assert(0) if debug_fname_prefix is not None: for (xc,yc) in sum(patches,[]): img[yc,xc] = 1.0 write_image(img, debug_fname_prefix+"_2.jpg") return patches class __UnitTest(unittest.TestCase): """Run a suite of unit tests on this module. """ # TODO: Add more unit tests. def test_apply_matrix_to_image(self): """Unit test for apply_matrix_to_image. Test by using a canned set of values on a 1x1 pixel image. [ 1 2 3 ] [ 0.1 ] [ 1.4 ] [ 4 5 6 ] * [ 0.2 ] = [ 3.2 ] [ 7 8 9 ] [ 0.3 ] [ 5.0 ] mat x y """ mat = numpy.array([[1,2,3],[4,5,6],[7,8,9]]) x = numpy.array([0.1,0.2,0.3]).reshape(1,1,3) y = apply_matrix_to_image(x, mat).reshape(3).tolist() y_ref = [1.4,3.2,5.0] passed = all([math.fabs(y[i] - y_ref[i]) < 0.001 for i in xrange(3)]) self.assertTrue(passed) def test_apply_lut_to_image(self): """ Unit test for apply_lut_to_image. Test by using a canned set of values on a 1x1 pixel image. The LUT will simply double the value of the index: lut[x] = 2*x """ lut = numpy.array([2*i for i in xrange(65536)]) x = numpy.array([0.1,0.2,0.3]).reshape(1,1,3) y = apply_lut_to_image(x, lut).reshape(3).tolist() y_ref = [0.2,0.4,0.6] passed = all([math.fabs(y[i] - y_ref[i]) < 0.001 for i in xrange(3)]) self.assertTrue(passed) if __name__ == '__main__': unittest.main()