#include"stdio.h"#include<iostream>#include<cuda.h>#include<cuda_runtime.h>//Defining two constants
__constant__ int constant_f;
__constant__ int constant_g;#defineN5//Kernel function for using constant memory
__global__ voidgpu_constant_memory(float*d_in,float*d_out){//Thread index for current kernelint tid = threadIdx.x;
d_out[tid]= constant_f*d_in[tid]+ constant_g;}
常量内存中的变量使用 __constant__ 关键字修饰
使用 cudaMemcpyToSymbol 函数吧这些常量复制到内核执行所需要的常量内存中
常量内存应合理使用,不然会增加程序执行时间
主函数调用如下:
intmain(void){//Defining Arrays for hostfloat h_in[N], h_out[N];//Defining Pointers for devicefloat*d_in,*d_out;int h_f =2;int h_g =20;// allocate the memory on the cpucudaMalloc((void**)&d_in, N *sizeof(float));cudaMalloc((void**)&d_out, N *sizeof(float));//Initializing Arrayfor(int i =0; i < N; i++){
h_in[i]= i;}//Copy Array from host to devicecudaMemcpy(d_in, h_in, N *sizeof(float), cudaMemcpyHostToDevice);//Copy constants to constant memorycudaMemcpyToSymbol(constant_f,&h_f,sizeof(int),0,cudaMemcpyHostToDevice);cudaMemcpyToSymbol(constant_g,&h_g,sizeof(int));//Calling kernel with one block and N threads per block
gpu_constant_memory <<<1, N >>>(d_in, d_out);//Coping result back to host from device memorycudaMemcpy(h_out, d_out, N *sizeof(float), cudaMemcpyDeviceToHost);//Printing result on consoleprintf("Use of Constant memory on GPU \n");for(int i =0; i < N; i++){printf("The expression for input %f is %f\n", h_in[i], h_out[i]);}//Free up memorycudaFree(d_in);cudaFree(d_out);return0;}
#include"stdio.h"#include<iostream>#include<cuda.h>#include<cuda_runtime.h>#defineNUM_THREADS10#defineN10//纹理内存定义
texture <float,1, cudaReadModeElementType> textureRef;
__global__ voidgpu_texture_memory(int n,float*d_out){int idx = blockIdx.x*blockDim.x + threadIdx.x;if(idx < n){float temp =tex1D(textureRef,float(idx));
d_out[idx]= temp;}}intmain(){//Calculate number of blocks to launchint num_blocks = N / NUM_THREADS +((N % NUM_THREADS)?1:0);//Declare device pointerfloat*d_out;// allocate space on the device for the resultcudaMalloc((void**)&d_out,sizeof(float)* N);// allocate space on the host for the resultsfloat*h_out =(float*)malloc(sizeof(float)*N);//Declare and initialize host arrayfloat h_in[N];for(int i =0; i < N; i++){
h_in[i]=float(i);}//Define CUDA Array
cudaArray *cu_Array;cudaMallocArray(&cu_Array,&textureRef.channelDesc, N,1);//Copy data to CUDA Array,(0,0)表示从左上角开始cudaMemcpyToArray(cu_Array,0,0, h_in,sizeof(float)*N, cudaMemcpyHostToDevice);// bind a texture to the CUDA arraycudaBindTextureToArray(textureRef, cu_Array);//Call Kernel
gpu_texture_memory <<<num_blocks, NUM_THREADS >>>(N, d_out);// copy result back to hostcudaMemcpy(h_out, d_out,sizeof(float)*N, cudaMemcpyDeviceToHost);printf("Use of Texture memory on GPU: \n");for(int i =0; i < N; i++){printf("Texture element at %d is : %f\n",i, h_out[i]);}free(h_out);cudaFree(d_out);cudaFreeArray(cu_Array);cudaUnbindTexture(textureRef);}
数据库DCL语句
介绍:
DCL英文全称是Data Control Language(数据控制语言),用来管理数据库用户、控制数据库的访 问权限。
管理用户:
查询用户:
select * from mysql.user;创建用户:
create user 用户名主机名 identified by 密码;修改用…
基于开源ATmega8 无感BLDC程序移植到ATmega328PB 🔖基于Atmel Studio 7.0开发环境。🥕开源原项目资源地址:https://svn.mikrokopter.de/websvn/listing.php?repnameBL-Ctrl&path%2F&📍原理图和PCB资源 BL-Ctrl v2.0 in E…
得到一个T4.pyc
回编译一下
得到下面代码 import base64def encrypt_and_compare(user_input, offset_str, target_base64):if len(user_input) ! 24:return Please enter a string with a length of 24encrypted Nonefor i, char in enumerate(user_input):offset int(off…