diff --git "a/CUDA\350\252\262\351\241\214/streamfft.cu" "b/CUDA\350\252\262\351\241\214/streamfft.cu"
new file mode 100644
index 0000000000000000000000000000000000000000..1057c84898f0bf273ea83df4ea3f0e566ce862f8
--- /dev/null
+++ "b/CUDA\350\252\262\351\241\214/streamfft.cu"
@@ -0,0 +1,68 @@
+#include <stdio.h>
+#include <math.h>
+#include <cufft.h>
+#define NX 512
+#define NY 512
+
+// 複素数の構造体定義
+struct Complex {
+    float real;
+    float imag;
+};
+
+int main() {
+    const int size = NX * NY;
+    const int complexSize = size * sizeof(Complex);
+
+    cufftHandle plan;
+
+    // ホスト上の入力データと出力データ
+    Complex hostInput[NX][NY];
+    Complex hostOutput[size];
+
+    // デバイス上の入力データと出力データ
+    Complex *deviceInput;
+    Complex *deviceOutput;
+
+    for (int i = 0; i < NX; i++) {
+        for (int j = 0; j < NY; j++) {
+            hostInput[i][j].real = static_cast<float>(sin(i + j));
+            hostInput[i][j].imag = static_cast<float>(cos(i + j));
+        }
+    }
+
+    // デバイスメモリを確保
+    cudaMalloc((void**)&deviceInput, complexSize);
+    cudaMalloc((void**)&deviceOutput, complexSize);
+
+    // ストリームの生成
+    cudaStream_t stream;
+    cudaStreamCreate(&stream);
+
+    // 入力データ転送
+    cudaMemcpyAsync(deviceInput, hostInput, complexSize, cudaMemcpyHostToDevice, stream);
+
+    // 2次元FFTのプランを作成
+    cufftPlan2d(&plan, NX, NY, CUFFT_C2C);
+
+    // FFTを実行
+    cufftExecC2C(plan, (cufftComplex*)deviceInput, (cufftComplex*)deviceOutput, CUFFT_FORWARD);
+
+    // 出力データ転送
+    cudaMemcpyAsync(hostOutput, deviceOutput, complexSize, cudaMemcpyDeviceToHost, stream);
+
+    // 結果の出力
+    for (int i = 0; i < NX; i++) {
+        for (int j = 0; j < NY; j++) {
+            int idx = i * NY + j;
+            printf("Input: %f + %fI, Output: %f + %fI\n", hostInput[i][j].real, hostInput[i][j].imag, hostOutput[idx].real, hostOutput[idx].imag);
+        }
+    }
+
+    cudaStreamDestroy(stream);
+    cufftDestroy(plan);
+    cudaFree(deviceInput);
+    cudaFree(deviceOutput);
+
+    return 0;
+}
\ No newline at end of file