如何利用OpenCV4.9离散傅里叶变换

返回：OpenCV系列文章目录（持续更新中......）

上一篇:如何利用OpenCV4.9 更改图像的对比度和亮度

下一篇:OpenCV 如何使用 XML 和 YAML 文件的文件输入和输出

目标

我们将寻求以下问题的答案：

什么是傅里叶变换，为什么要使用它？
如何在 OpenCV 中做到这一点？
使用以下函数： copyMakeBorder() , merge() , dft() , getOptimalDFTSize() , log()和normalize() .

源代码

你可以在官方网站下载相关源代码。

以下 dft() 的用法示例:

C++

#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp" 
#include <iostream> 
using namespace cv;
using namespace std; 
static void help(char ** argv)
{
 cout << endl
 << "This program demonstrated the use of the discrete Fourier transform (DFT). " << endl
 << "The dft of an image is taken and it's power spectrum is displayed." << endl << endl
 << "Usage:" << endl
 << argv[0] << " [image_name -- default lena.jpg]" << endl << endl;
} 
int main(int argc, char ** argv)
{
 help(argv); 
 const char* filename = argc >=2 ? argv[1] : "lena.jpg"; 
 Mat I = imread( samples::findFile( filename ), IMREAD_GRAYSCALE);
 if( I.empty()){
 cout << "Error opening image" << endl;
 return EXIT_FAILURE;
 } 
 Mat padded; //expand input image to optimal size
 int m = getOptimalDFTSize( I.rows );
 int n = getOptimalDFTSize( I.cols ); // on the border add zero values
 copyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0)); 
 Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
 Mat complexI;
 merge(planes, 2, complexI); // Add to the expanded another plane with zeros 
 dft(complexI, complexI); // this way the result may fit in the source matrix 
 // compute the magnitude and switch to logarithmic scale
 // => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))
 split(complexI, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
 magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitude
 Mat magI = planes[0]; 
 magI += Scalar::all(1); // switch to logarithmic scale
 log(magI, magI); 
 // crop the spectrum, if it has an odd number of rows or columns
 magI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2)); 
 // rearrange the quadrants of Fourier image so that the origin is at the image center
 int cx = magI.cols/2;
 int cy = magI.rows/2; 
 Mat q0(magI, Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
 Mat q1(magI, Rect(cx, 0, cx, cy)); // Top-Right
 Mat q2(magI, Rect(0, cy, cx, cy)); // Bottom-Left
 Mat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right 
 Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
 q0.copyTo(tmp);
 q3.copyTo(q0);
 tmp.copyTo(q3); 
 q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
 q2.copyTo(q1);
 tmp.copyTo(q2); 
 normalize(magI, magI, 0, 1, NORM_MINMAX); // Transform the matrix with float values into a
 // viewable image form (float between values 0 and 1). 
 imshow("Input Image" , I ); // Show the result
 imshow("spectrum magnitude", magI);
 waitKey(); 
 return EXIT_SUCCESS;
}

Java

import org.opencv.core.*;
import org.opencv.highgui.HighGui;
import org.opencv.imgcodecs.Imgcodecs; 
import java.util.List;
import java.util.*; 
class DiscreteFourierTransformRun{
 private void help() {
 System.out.println("" +
 "This program demonstrated the use of the discrete Fourier transform (DFT). \n" +
 "The dft of an image is taken and it's power spectrum is displayed.\n" +
 "Usage:\n" +
 "./DiscreteFourierTransform [image_name -- default ../data/lena.jpg]");
 } 
 public void run(String[] args){ 
 help(); 
 String filename = ((args.length > 0) ? args[0] : "../data/lena.jpg"); 
 Mat I = Imgcodecs.imread(filename, Imgcodecs.IMREAD_GRAYSCALE);
 if( I.empty() ) {
 System.out.println("Error opening image");
 System.exit(-1);
 } 
 Mat padded = new Mat(); //expand input image to optimal size
 int m = Core.getOptimalDFTSize( I.rows() );
 int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero values
 Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0)); 
 List<Mat> planes = new ArrayList<Mat>();
 padded.convertTo(padded, CvType.CV_32F);
 planes.add(padded);
 planes.add(Mat.zeros(padded.size(), CvType.CV_32F));
 Mat complexI = new Mat();
 Core.merge(planes, complexI); // Add to the expanded another plane with zeros 
 Core.dft(complexI, complexI); // this way the result may fit in the source matrix 
 // compute the magnitude and switch to logarithmic scale
 // => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))
 Core.split(complexI, planes); // planes.get(0) = Re(DFT(I)
 // planes.get(1) = Im(DFT(I))
 Core.magnitude(planes.get(0), planes.get(1), planes.get(0));// planes.get(0) = magnitude
 Mat magI = planes.get(0); 
 Mat matOfOnes = Mat.ones(magI.size(), magI.type());
 Core.add(matOfOnes, magI, magI); // switch to logarithmic scale
 Core.log(magI, magI); 
 // crop the spectrum, if it has an odd number of rows or columns
 magI = magI.submat(new Rect(0, 0, magI.cols() & -2, magI.rows() & -2)); 
 // rearrange the quadrants of Fourier image so that the origin is at the image center
 int cx = magI.cols()/2;
 int cy = magI.rows()/2; 
 Mat q0 = new Mat(magI, new Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
 Mat q1 = new Mat(magI, new Rect(cx, 0, cx, cy)); // Top-Right
 Mat q2 = new Mat(magI, new Rect(0, cy, cx, cy)); // Bottom-Left
 Mat q3 = new Mat(magI, new Rect(cx, cy, cx, cy)); // Bottom-Right 
 Mat tmp = new Mat(); // swap quadrants (Top-Left with Bottom-Right)
 q0.copyTo(tmp);
 q3.copyTo(q0);
 tmp.copyTo(q3); 
 q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
 q2.copyTo(q1);
 tmp.copyTo(q2); 
 magI.convertTo(magI, CvType.CV_8UC1);
 Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1); // Transform the matrix with float values
 // into a viewable image form (float between
 // values 0 and 255). 
 HighGui.imshow("Input Image" , I ); // Show the result
 HighGui.imshow("Spectrum Magnitude", magI);
 HighGui.waitKey(); 
 System.exit(0);
 }
} 
public class DiscreteFourierTransform {
 public static void main(String[] args) {
 // Load the native library.
 System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
 new DiscreteFourierTransformRun().run(args);
 }
}

Python

from __future__ import print_function
import sys 
import cv2 as cv
import numpy as np 
def print_help():
 print('''
 This program demonstrated the use of the discrete Fourier transform (DFT).
 The dft of an image is taken and it's power spectrum is displayed.
 Usage:
 discrete_fourier_transform.py [image_name -- default lena.jpg]''') 
def main(argv): 
 print_help() 
 filename = argv[0] if len(argv) > 0 else 'lena.jpg' 
 I = cv.imread(cv.samples.findFile(filename), cv.IMREAD_GRAYSCALE)
 if I is None:
 print('Error opening image')
 return -1 
 rows, cols = I.shape
 m = cv.getOptimalDFTSize( rows )
 n = cv.getOptimalDFTSize( cols )
 padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0]) 
 planes = [np.float32(padded), np.zeros(padded.shape, np.float32)]
 complexI = cv.merge(planes) # Add to the expanded another plane with zeros 
 cv.dft(complexI, complexI) # this way the result may fit in the source matrix 
 cv.split(complexI, planes) # planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
 cv.magnitude(planes[0], planes[1], planes[0])# planes[0] = magnitude
 magI = planes[0] 
 matOfOnes = np.ones(magI.shape, dtype=magI.dtype)
 cv.add(matOfOnes, magI, magI) # switch to logarithmic scale
 cv.log(magI, magI) 
 magI_rows, magI_cols = magI.shape
 # crop the spectrum, if it has an odd number of rows or columns
 magI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]
 cx = int(magI_rows/2)
 cy = int(magI_cols/2) 
 q0 = magI[0:cx, 0:cy] # Top-Left - Create a ROI per quadrant
 q1 = magI[cx:cx+cx, 0:cy] # Top-Right
 q2 = magI[0:cx, cy:cy+cy] # Bottom-Left
 q3 = magI[cx:cx+cx, cy:cy+cy] # Bottom-Right 
 tmp = np.copy(q0) # swap quadrants (Top-Left with Bottom-Right)
 magI[0:cx, 0:cy] = q3
 magI[cx:cx + cx, cy:cy + cy] = tmp 
 tmp = np.copy(q1) # swap quadrant (Top-Right with Bottom-Left)
 magI[cx:cx + cx, 0:cy] = q2
 magI[0:cx, cy:cy + cy] = tmp 
 cv.normalize(magI, magI, 0, 1, cv.NORM_MINMAX) # Transform the matrix with float values into a 
 cv.imshow("Input Image" , I ) # Show the result
 cv.imshow("spectrum magnitude", magI)
 cv.waitKey() 
if __name__ == "__main__":
 main(sys.argv[1:])

解释：

傅里叶变换会将图像分解为正弦和余弦分量。换句话说，它将图像从其空间域转换为其频域。这个想法是，任何函数都可以精确地近似于无限正弦和余弦函数的总和。傅里叶变换是一种如何做到这一点的方法。从数学上讲，二维图像傅里叶变换是：

这里 f 是其空间域中的图像值，F 是其频域中的图像值。变换的结果是复数。可以通过真实图像和复数图像或通过幅度和相位图像来显示这一点。然而，在整个图像处理算法中，只有幅度图像是有趣的，因为它包含了我们需要的有关图像几何结构的所有信息。但是，如果您打算以这些形式对图像进行一些修改，然后需要重新转换它，则需要保留这两种形式。

在此示例中，我将演示如何计算和显示傅里叶变换的幅度图像。在数字图像的情况下是离散的。这意味着它们可能会从给定的域值中获取一个值。例如，在基本灰度中，图像值通常介于 0 到 255 之间。因此，傅里叶变换也需要是离散类型的，从而产生离散傅里叶变换（DFT）。每当您需要从几何角度确定图像的结构时，您都需要使用它。以下是要遵循的步骤（如果是灰度输入图像 I）：

将图像扩展到最佳大小

DFT 的性能取决于图像大小。对于数字 2、3 和 5 的倍数的图像大小，它往往是最快的。因此，为了实现最佳性能，通常最好将边框值填充到图像上，以获得具有此类特征的大小。getOptimalDFTSize()返回这个最佳大小，我们可以使用 copyMakeBorder() 函数来扩展图像的边框（附加的像素以零初始化）：

c++:

 Mat padded; //expand input image to optimal size
 int m = getOptimalDFTSize( I.rows );
 int n = getOptimalDFTSize( I.cols ); // on the border add zero values
 copyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0));

Java:

 Mat padded = new Mat(); //expand input image to optimal size
 int m = Core.getOptimalDFTSize( I.rows() );
 int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero values
 Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0));

python:

 rows, cols = I.shape
 m = cv.getOptimalDFTSize( rows )
 n = cv.getOptimalDFTSize( cols )
 padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])

为复杂价值和真实价值腾出空间

傅里叶变换的结果很复杂。这意味着对于每个图像值，结果是两个图像值（每个组件一个）。此外，频域范围比空间对应物大得多。因此，我们通常至少以浮点格式存储这些内容。因此，我们将输入图像转换为此类型，并使用另一个通道对其进行扩展以保存复数值：

c++:

 Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
 Mat complexI;
 merge(planes, 2, complexI); // Add to the expanded another plane with zeros

Java:

 Mat padded = new Mat(); //expand input image to optimal size
 int m = Core.getOptimalDFTSize( I.rows() );
 int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero values
 Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0));

Python:

 rows, cols = I.shape
 m = cv.getOptimalDFTSize( rows )
 n = cv.getOptimalDFTSize( cols )
 padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])

进行离散傅里叶变换

可以就地计算（输入与输出相同）：

C++:

dft(complexI, complexI); // this way the result may fit in the source matrix

Java:

 Core.dft(complexI, complexI); // this way the result may fit in the source matrix

Python:

 cv.dft(complexI, complexI) # this way the result may fit in the source matrix

将实数值和复数值转换为量级

复数有一个实数（Re）和一个复数（虚数-Im）部分。DFT 的结果是复数。DFT 的量级为：

翻译成 OpenCV 代码：

C++:

 split(complexI, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
 magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitude
 Mat magI = planes[0];

Java:

 Core.split(complexI, planes); // planes.get(0) = Re(DFT(I)
 // planes.get(1) = Im(DFT(I))
 Core.magnitude(planes.get(0), planes.get(1), planes.get(0));// planes.get(0) = magnitude
 Mat magI = planes.get(0);

Python:

 cv.split(complexI, planes) # planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
 cv.magnitude(planes[0], planes[1], planes[0])# planes[0] = magnitude
 magI = planes[0]

切换到对数刻度

事实证明，傅里叶系数的动态范围太大，无法显示在屏幕上。我们有一些小的和一些高的变化值，我们无法像这样观察到。因此，高值将全部变成白点，而小值则变成黑点。为了使用灰度值进行可视化，我们可以将线性刻度转换为对数刻度：

翻译成 OpenCV 代码：

C++:

 magI += Scalar::all(1); // switch to logarithmic scale
 log(magI, magI);

Java:

 Mat matOfOnes = Mat.ones(magI.size(), magI.type());
 Core.add(matOfOnes, magI, magI); // switch to logarithmic scale
 Core.log(magI, magI);

Python :

 matOfOnes = np.ones(magI.shape, dtype=magI.dtype)
 cv.add(matOfOnes, magI, magI) # switch to logarithmic scale
 cv.log(magI, magI)

裁剪和重新排列

还记得，在第一步，我们扩展了图像吗？好吧，是时候扔掉新引入的价值观了。出于可视化目的，我们还可以重新排列结果的象限，使原点（零，零）与图像中心相对应。

C++:

 // crop the spectrum, if it has an odd number of rows or columns
 magI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2)); 
 // rearrange the quadrants of Fourier image so that the origin is at the image center
 int cx = magI.cols/2;
 int cy = magI.rows/2; 
 Mat q0(magI, Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
 Mat q1(magI, Rect(cx, 0, cx, cy)); // Top-Right
 Mat q2(magI, Rect(0, cy, cx, cy)); // Bottom-Left
 Mat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right 
 Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
 q0.copyTo(tmp);
 q3.copyTo(q0);
 tmp.copyTo(q3); 
 q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
 q2.copyTo(q1);
 tmp.copyTo(q2);

Java:

 // crop the spectrum, if it has an odd number of rows or columns
 magI = magI.submat(new Rect(0, 0, magI.cols() & -2, magI.rows() & -2));
  // rearrange the quadrants of Fourier image so that the origin is at the image center
 int cx = magI.cols()/2;
 int cy = magI.rows()/2; 
 Mat q0 = new Mat(magI, new Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
 Mat q1 = new Mat(magI, new Rect(cx, 0, cx, cy)); // Top-Right
 Mat q2 = new Mat(magI, new Rect(0, cy, cx, cy)); // Bottom-Left
 Mat q3 = new Mat(magI, new Rect(cx, cy, cx, cy)); // Bottom-Right 
 Mat tmp = new Mat(); // swap quadrants (Top-Left with Bottom-Right)
 q0.copyTo(tmp);
 q3.copyTo(q0);
 tmp.copyTo(q3); 
 q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
 q2.copyTo(q1);
 tmp.copyTo(q2);

Python:

 magI_rows, magI_cols = magI.shape
 # crop the spectrum, if it has an odd number of rows or columns
 magI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]
 cx = int(magI_rows/2)
 cy = int(magI_cols/2) 
 q0 = magI[0:cx, 0:cy] # Top-Left - Create a ROI per quadrant
 q1 = magI[cx:cx+cx, 0:cy] # Top-Right
 q2 = magI[0:cx, cy:cy+cy] # Bottom-Left
 q3 = magI[cx:cx+cx, cy:cy+cy] # Bottom-Right 
 tmp = np.copy(q0) # swap quadrants (Top-Left with Bottom-Right)
 magI[0:cx, 0:cy] = q3
 magI[cx:cx + cx, cy:cy + cy] = tmp 
 tmp = np.copy(q1) # swap quadrant (Top-Right with Bottom-Left)
 magI[cx:cx + cx, 0:cy] = q2
 magI[0:cx, cy:cy + cy] = tmp

正常化

出于可视化目的，再次执行此操作。我们现在有了星等，但这仍然超出了我们的图像显示范围 0 到 1。我们使用 cv：：normalize（）函数将我们的值规范化到这个范围。

C++:

 normalize(magI, magI, 0, 1, NORM_MINMAX); // Transform the matrix with float values into a
 // viewable image form (float between values 0 and 1).

Java:

 Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1); // Transform the matrix with float values
 // into a viewable image form (float between
 // values 0 and 255).

python:

 cv.normalize(magI, magI, 0, 1, cv.NORM_MINMAX) # Transform the matrix with float values into a

结果

一个应用思路是确定图像中存在的几何方向。例如，让我们找出文本是否是水平的？看一些文本，你会注意到文本线条也形成了水平线，字母形成了垂直线条。在傅里叶变换的情况下，也可以看到文本片段的这两个主要组成部分。让我们使用这个水平和这个旋转的图像来描述一个文本。

如果是横向文本：

如果文本旋转：

您可以看到，频域中最有影响力的分量（幅度图像上最亮的点）遵循图像上物体的几何旋转。由此我们可以计算偏移量并执行图像旋转以校正最终的未对齐。

参考文献：

1、《Discrete Fourier Transform》-----Bernát Gábor