Keras-image

科技2026-04-08 15

# import the necessary packages from keras.preprocessing.image import ImageDataGenerator from keras.preprocessing.image import img_to_array from keras.preprocessing.image import load_img import argparse import cv2 ############ #image basic ############ # Construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required = True,help = "Path to the image") args = vars(ap.parse_args()) # im operations image = cv2.imread(args["image"]) # Load the image and show some basic information on it cv2.imshow("Image", image) # Show the image cv2.imwrite("newimage.jpg", image) # Save the image cv2.waitKey(0) # wait for a keypress # Crop corner = image[0:100, 0:100] # Draw rectangle cv2.rectangle(canvas, (200, 50), (225, 125), (255, 0, 0), -1) cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4),(0, 255, 0), 1) # Convert gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # HSV (Hue, Saturation, Value) color spaces gray = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) # L*a*b* color spaces # Flip flipped = cv2.flip(image, 1) # horizontally flipped = cv2.flip(image, 0) # vertically flipped = cv2.flip(image, -1) # along both axes # Resize resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA) resized = imutils.resize(image, width = 100, height=100, inter=cv2.INTER_AREA) # we can specify our target width or height # Rotate rotated = imutils.rotate(image, 180) # Split and Merge (B, G, R) = cv2.split(image) merged = cv2.merge([B, G, R]) # Translate shifted = imutils.translate(image, 0, 100) # color histogram hist = cv2.calcHist([chan], [0], None, [256], [0, 256]) # 1D color histogram hist = cv2.calcHist([chans[1], chans[0]], [0, 1], None,[32, 32], [0, 256, 0, 256]) # 2D color histogram # Apply histogram equalization to stretch the constrast of our image eq = cv2.equalizeHist(image) # Blurr blurred = cv2.blur(image, (3, 3)) # Averaged blurred = cv2.GaussianBlur(image, (3, 3), 0) # GaussianBlur blurred = cv2.medianBlur(image, 3) # medianBlur blurred = cv2.bilateralFilter(image, 5, 21, 21) # bilateralFilter # Simple Threshold (T, threshInv) = cv2.threshold(blurred, 155, 255, cv2.THRESH_BINARY_INV) (T, thresh) = cv2.threshold(blurred, 155, 255, cv2.THRESH_BINARY) # Adaptive Threshold image = cv2.imread(args["image"]) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(image, (5, 5), 0) thresh = cv2.adaptiveThreshold(blurred, 255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4) # Mean Thresh thresh = cv2.adaptiveThreshold(blurred, 255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 3) # Gaussian Thresh #Canny image = cv2.imread(args["image"]) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = cv2.GaussianBlur(image, (5, 5), 0) canny = cv2.Canny(image, 30, 150) # Compute the Laplacian of the image lap = cv2.Laplacian(image, cv2.CV_64F) lap = np.uint8(np.absolute(lap)) # Compute gradients along the X and Y axis, respectively sobelX = cv2.Sobel(image, cv2.CV_64F, 1, 0) sobelY = cv2.Sobel(image, cv2.CV_64F, 0, 1) # Pad gray = cv2.copyMakeBorder(gray, 20, 20, 20, 20, cv2.BORDER_REPLICATE) # Find the Right Contours (_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(coins, cnts, -1, (0, 255, 0), 2) (x, y, w, h) = cv2.boundingRect(c) ((centerX, centerY), radius) = cv2.minEnclosingCircle(c) # Text cv2.putText(image, str(digit), (x - 10, y - 10),cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2) ############## #image imutils ############## def translate(image, x, y): # Define the translation matrix and perform the translation M = np.float32([[1, 0, x], [0, 1, y]]) shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0])) # Return the translated image return shifted def rotate(image, angle, center = None, scale = 1.0): # Grab the dimensions of the image (h, w) = image.shape[:2] # If the center is None, initialize it as the center of # the image if center is None: center = (w / 2, h / 2) # Perform the rotation M = cv2.getRotationMatrix2D(center, angle, scale) rotated = cv2.warpAffine(image, M, (w, h)) # Return the rotated image return rotated def resize(image, width = None, height = None, inter = cv2.INTER_AREA): # initialize the dimensions of the image to be resized and # grab the image size dim = None (h, w) = image.shape[:2] # if both the width and height are None, then return the # original image if width is None and height is None: return image # check to see if the width is None if width is None: # calculate the ratio of the height and construct the # dimensions r = height / float(h) dim = (int(w * r), height) # otherwise, the height is None else: # calculate the ratio of the width and construct the # dimensions r = width / float(w) dim = (width, int(h * r)) # resize the image resized = cv2.resize(image, dim, interpolation = inter) # return the resized image return resized ################### #image augmentation ################### print("[INFO] loading example image...") image = load_img(args["image"]) image = img_to_array(image) image = np.expand_dims(image,axis=0) aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest") total = 0 print("[INFO] generating images...") imageGen = aug.flow(image, batch_size=1, save_to_dir=args["output"], save_prefix=args["prefix"], save_format="jpg") for image in imageGen: total += 1 if total == 10: break #################### #image preprocessing #################### imagePaths = list(paths.list_images(args["dataset"])) classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths] classNames = [str(x) for x in np.unique(classNames)] aap = AspectAwarePreprocessor(64,64) iap = ImageToArrayPreprocessor() sdl = SimpleDatasetLoader(preprocessors=[aap,iap]) (data,labels) = sdl.load(imagePaths, verbose=500) ################# #image processing ################# # loop over the input images for imagePath in sorted(list(paths.list_images(args["dataset"]))): # load the image, pre-process it, and store it in the data list image = cv2.imread(imagePath) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = imutils.resize(image, width=28) / image = preprocess(image, 28, 28) image = img_to_array(image) data.append(image) # extract the class label from the image path and update the labels list label = imagePath.split(os.path.sep)[-2] labels.append(label) # randomly sample a few of the input images imagePaths = list(paths.list_images(args["input"])) imagePaths = np.random.choice(imagePaths, size=(10,),replace=False) ############## #image testing ############## # loop over the image paths for imagePath in imagePaths: # load the image and convert it to grayscale, then pad the image # to ensure digits caught only the border of the image are # retained image = cv2.imread(imagePath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.copyMakeBorder(gray, 20, 20, 20, 20, cv2.BORDER_REPLICATE) # threshold the image reveal the digits thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] # find contours in the image, keeping only the four largest ones. # then sort them from left to right. cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if imutils.is_cv2() else cnts[1] cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4] cnts = contours.sort_contours(cnts)[0] # initialize the output image as "grayscale" image with 3 channels along # with the output predictions output = cv2.merge([gray] * 3) predictions = [] for c in cnts: (x, y, w, h) = cv2.boundingRect(c) roi = gray[y - 5:y + h + 5, x - 5:x + w + 5] roi = preprocess(roi, 28, 28) roi = np.expand_dims(img_to_array(roi), axis=0) pred = model.predict(roi).argmax(axis=1)[0] + 1 predictions.append(str(pred)) # draw the prediction on the output image cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4),(0, 255, 0), 1) cv2.putText(output, str(pred), (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX,0.55, (0, 255, 0), 2) # show the output image print("[INFO] captcha: {}".format("".join(predictions))) cv2.imshow("Output", output) cv2.waitKey() ################# #image annotating ################# for (i, imagePath) in enumerate(imagePaths): print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths))) try: # load image image = cv2.imread(imagePath) # convert to grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # add padding gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE) # threshold to black/white thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] # find contours cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if imutils.is_cv2() else cnts[1] cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4] # loop over each contour and extract the image for c in cnts: # compute bounding box for the contour (x, y, w, h) = cv2.boundingRect(c) roi = gray[y - 5:y + h + 5, x - 5:x + w + 5] # display the chracter, making it large enough for us to see, then # wait for keypress cv2.imshow("ROI", imutils.resize(roi, width=28)) key = cv2.waitKey(0) if key == ord("`"): print("[INFO] ignoring character") continue # grab the key that was pressed and construct path to output # directory key = chr(key).upper() dirPath = os.path.sep.join([args["annot"], key]) # if output directory does not exists, create it if not os.path.exists(dirPath): os.makedirs(dirPath) # write the labeled character to file count = counts.get(key, 1) p = os.path.sep.join([dirPath,"{}.png".format(str(count).zfill(6))]) cv2.imwrite(p, roi) counts[key] = count + 1 except KeyboardInterrupt: print("[INFO] manually leaving script") break except: # unkown error print("[INFO] skipping image...") ################## #image downloading ################## for i in range(0, args["num_images"]): try: # fetch the image r = requests.get(url, timeout=60) # save the image to disc p = os.path.join(args["output"], "{}.jpg".format(str(total).zfill(5))) f = open(p, "wb") f.write(r.content) f.close() # update counter print("[INFO] downloaded: {}".format(p)) total += 1 except: print("[INFO] error downloading image...") # sleep time.sleep(0.1) ############# #video stream ############# # if no video was supplied, use the webcam if not args.get("video", False): camera = cv2.VideoCapture(0) # otherwise load the video else: camera = cv2.VideoCapture(args["video"]) while True: # grab the current frame (grabbed, frame) = camera.read() # if we are viewing a video and did not a grab a frame then we have reached # the end of the video if args.get("video") and not grabbed: break # resize, convert to grayscale, and then clone it (so we can annotate it) frame = imutils.resize(frame, width=300) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frameClone = frame.copy() # detect faces in the input frame, then clone the frame so we can draw on it rects = detector.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE) # loop over the detected bounding boxes for (fX, fY, fW, fH) in rects: # extract the ROI of the face from the grayscale image, resize it # to 28x28, and then prepare the ROI for classification roi = gray[fY:fY + fH, fX:fX + fW] roi = cv2.resize(roi, (28, 28)) roi = roi.astype("float") / 255.0 roi = img_to_array(roi) roi = np.expand_dims(roi, axis=0) (notSmiling, smiling) = model.predict(roi)[0] label = "Smiling" if smiling > notSmiling else "Not Smiling" cv2.putText(frameClone, label, (fX, fY - 10), cv2.FONT_HERSHEY_SIMPLEX,0.45, (0, 0, 255), 2) cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH), (0, 0, 255), 2) cv2.imshow("Face", frameClone) if cv2.waitKey(1) & 0xFF == ord("q"): break # clean up camera.release() cv2.destroyAllWindows()

Processed: 0.015, SQL: 9