【Linux】自主WEB服务器实现

自主web服务器实现

1️⃣构建TcpServer
2️⃣构建HttpServer
3️⃣构建HttpRequest和HttpResponse
- Http请求报文格式
- Http相应报文
- 读取、处理请求&构建响应
- - 读取请求中的一行
  - 读取请求中需要注意的点
4️⃣CGI模式
- 判断是否需要用CGI处理请求
- 构建任务&线程池管理
5️⃣实验结果及总结
- 项目源码：
- 测试服务器各种情况
- 总结

在这里插入图片描述

1️⃣构建TcpServer

首先根据通过Tcp/Ip协议获取到客户端的套接字：

创建监听套接字listen_sock
绑定bind监听套接字和相应端口号
监听listen来自客户端的连接

#pragma once

#include "log.hpp"

#include <iostream>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <pthread.h>
#include <cstdlib>
#include <unistd.h>

namespace ns_tcp
{
    const uint16_t g_port = 8080;
    const int backlog = 5;
    enum
    {
        SOCK_ERR = 2,
        BIND_ERR,
        LISTEN_ERR
    };
    class TcpServer
    {
    private:
        uint16_t _port;
        int _listen_sock;
        static TcpServer* svr;//单例模式
    private:
    TcpServer(uint16_t port = g_port):_port(port)
    {}
    TcpServer(const TcpServer& s)
    {}
    
    public:
    static TcpServer* getInstance(int port)
    {
        if(nullptr == svr)
        {
            static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;//静态初始化锁
            pthread_mutex_lock(&lock);//加锁
            if(nullptr == svr)
            {
                svr = new TcpServer(port);
            }
            pthread_mutex_unlock(&lock);//解锁
        }
        return svr;
    }

    void InitTcpServer()
    {
        Socket();
        Bind();
        Listen();
        LOG(INFO, "init tcp success");
    }

    void Socket()
    {
        _listen_sock = socket(AF_INET, SOCK_STREAM, 0);
        if(_listen_sock < 0)
        {
            LOG(FATAL, "socket error.");
            exit(SOCK_ERR);
        }
        //std::cout << "listen_sock create success" << std::endl;
        int opt = 1;
        setsockopt(_listen_sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));//设置地址复用
        LOG(INFO, "socket success.");
    }

    void Bind()
    {
        struct sockaddr_in local;
        local.sin_family = AF_INET;
        local.sin_port = htons(_port);
        local.sin_addr.s_addr = INADDR_ANY;//云服务器不绑定公网ip
        if(bind(_listen_sock, (sockaddr*)&local, sizeof(local)) < 0)
        {
            LOG(FATAL, "bind error.");
            exit(BIND_ERR);
        }
        //std::cout << "bind success:" << _port << std::endl;
        LOG(INFO, "bind success.");
    }

    void Listen()
    {
        if(listen(_listen_sock, backlog) < 0)
        {
            LOG(FATAL, "listen error.");
            exit(LISTEN_ERR);
        }
        LOG(INFO, "listen success");
    }

    int Sock()
    {
        return _listen_sock;
    }

    ~TcpServer()
    {
        if(_listen_sock >= 0)
            close(_listen_sock);
    }
    };

    TcpServer* TcpServer::svr = nullptr;

}

2️⃣构建HttpServer

启动TcpServer监听客户端的连接请求，当监听到来自客户端的连接后，由HttpServer接收获取accept对端套接字：

#pragma once

#include "TcpServer.hpp"
#include "Protocal.hpp"
#include "Task.hpp"
#include "ThreadPool.hpp"

#include <strings.h>
#include <signal.h>

using namespace ns_tcp;

namespace ns_http
{
    const uint16_t g_port = 8080;
    class HttpServer
    {
    private:
        int _port;
        bool _stop; // 服务器是否停止服务
    public:
        HttpServer(uint16_t port = g_port) : _port(port), _stop(false)
        {
        }
        void InitHttpServer()
        {
            //忽略SIGPIPE信号，如果不忽略，则会在写入失败时崩溃Server
            signal(SIGPIPE, SIG_IGN);
        }
        void Loop()
        {
            //std::cout << "http " << _port << std::endl;//_port = 0? 自己写的bug含泪也要找出来，传参传了argv[0]导致绑定的端口号有问题
            
            TcpServer* tsvr = TcpServer::getInstance(_port);
            tsvr->InitTcpServer();
            //std::cout << tsvr->Sock() << std::endl;
            while (!_stop)
            {
                // 获取对端链接
                sockaddr_in peer;
                socklen_t len = sizeof(peer);
                bzero(&peer, len);
                int sock = accept(tsvr->Sock(), (sockaddr *)&peer, &len);
                if (sock < 0) // accept error
                {
                    continue;
                }
                std::cout << "获取到新连接：" << sock << std::endl;

                Task t(sock);//创建任务
                ThreadPool::getInstance()->PushTask(t);//往线程池中添加任务


                //temporary cope
                /*pthread_t tid;
                int *_sock = new int(sock);
                pthread_create(&tid, nullptr, Entrance::HandlerRequest, _sock);
                pthread_detach(tid);*/
            }
        }
        ~HttpServer()
        {
        }
    };
}

3️⃣构建HttpRequest和HttpResponse

Http请求报文格式

在这里插入图片描述
由上图可知，Http请求报文分为四部分：

请求行request_line
请求报头request_header
空行blank
请求报文request_body

其中请求行分为请求方法method（GET or POST）、请求URI、Http协议及版本号
而请求报头是由一个个的K-V键值对构成

Http相应报文

在这里插入图片描述
Http响应报文与Http请求报文构成类似，亦是由四部分组成：

状态行status_line
响应报头response_header
空行blank
响应报文response_body

其中状态行由Http协议及版本号version、状态码status_code、状态码描述信息三部分构成
响应报头与请求报头一样，都是由键值对构成

由此构建出HttpRequest和HttpResponse类：

/**
 * 请求报文：
 * 1.请求行
 * 2.请求报头
 * 3.空行
 * 4.请求正文
*/
class HttpRequest
{
    public:
        std::string request_line;
        std::vector<std::string> request_header;
        std::string blank;
        std::string request_body;

        //请求行： 方法 uri 版本
        std::string method;
        std::string uri;//path?query_string
        std::string version;

        std::string path;
        std::string query_string;

        std::string suffix;//请求的资源后缀

        //请求报头： key - value
        std::unordered_map<std::string, std::string> headerkv;

        int content_length;



        bool cgi;
    public:
        HttpRequest():content_length(0),cgi(false)
        {}
        ~HttpRequest()
        {}
};

/**
 * 响应报文
 * 1.状态行
 * 2.响应报头
 * 3.空行
 * 4.响应报文
*/
class HttpResponse
{
    public:
        std::string status_line;
        std::vector<std::string> response_header;
        std::string blank;
        std::string response_body;

        int status_code;//状态码
        int fd;//打开网页所在的文件
        size_t size;//所访问资源的大小
    public:
        HttpResponse():blank(END_LINE),status_code(OK),fd(-1),size(0)
        {}
        ~HttpResponse()
        {}
};

读取、处理请求&构建响应

当一个Http请求到来时，首先我们需要把Http请求读取并解析出来：

读取请求中的一行

读取Http请求，需要化繁为简，先读取报文中的一行：
在Http协议中，一行的结束符一般有三种情况：
以"\n","\r"或者"\r\n"结束
因此，为了确保应对所有情况，我们在读取请求的时候将这三种情况统一转换为"\n"进行处理。
其次，对应HTTP请求报头的键值对，我们通过": "将其切分

class Util
{
    //存放工具的类
    public:
    /**
     * 用于读取报文中的一行
    */
    static int ReadLine(int sock, std::string& out)
    {
        char ch = 'X';
        while(ch != '\n')
        {
            ssize_t s = recv(sock, &ch, 1, 0);
            if(s > 0)
            {
                if(ch == '\r')
                {
                    //统一报文一行的格式为末尾\n结束
                    recv(sock, &ch, 1, MSG_PEEK);//对报文\r后的一个字符进行窥探，不从缓冲区中移除
                    if(ch == '\n')
                    {
                        //窥探成功，后一个字符为\n,将整体\r\n替换为\n
                        //将后一个从缓冲区中读出并移除
                        recv(sock, &ch, 1, 0);
                    }
                    else
                    {
                        //替换报文一行中最后一个字符为\n
                        ch = '\n';
                    }
                }
                //走到此处要么是报文内容，要么就是\n
                out += ch;//将该字符添加到out中输出
            }
            else if(s == 0)
            {
                //对端链接关闭
                return 0;
            }
            else
            {
                //读取出错
                return -1;
            }
        }
        return out.size();
    }

    /**
     * 用于划分报文的请求报头为key-value
    */
    static bool SepString(const std::string& header, std::string& out_str1, std::string& out_str2, std::string sep)
    {
        auto pos = header.find(sep);
        if(pos != std::string::npos)
        {
            out_str1 = header.substr(0, pos);
            out_str2 = header.substr(pos + sep.size());
            return true;
        }
        return false;
    }
};

读取请求中需要注意的点

在读取请求的时候，为了美观，会通过resize将一行的行结束符"\n"去除，但是去除的前提是对端发来的请求是完整的，若是读取的请求为空，那么会导致

std::length_error,what(): basic_string::resize 这样的运行错误

//读取请求 处理请求 构建响应
class EndPoint
{
    private:
        int _sock;//对端套接字
        HttpRequest _http_request;
        HttpResponse _http_response;
        bool stop;//用于服务器的启停
    private:
        bool RecvHttpRequestLine()
        {
            //读取请求行
            if(Util::ReadLine(_sock, _http_request.request_line) > 0)//读取成功
            {          
                _http_request.request_line.resize(_http_request.request_line.size() - 1);
                LOG(INFO, _http_request.request_line);
            }
            else
            {
                stop = true;//读取失败，终止服务器
            }
            return stop;
        }
        bool RecvHttpRequestHeader()
        {
            std::string line;
            while(true)
            {
                line.clear();//清空line
                if(Util::ReadLine(_sock, line) <= 0)//读取失败，停止服务
                {
                    stop = true;
                    break;
                }
                if(line == "\n")//空行
                {
                    _http_request.blank = line;
                    LOG(INFO, line);
                    break;//忘记break了，死活半天没找着这个bug，导致没跳出循环，之后走出if语句resize出错了
                }
                line.resize(line.size() - 1);//将该行的\n去除
                _http_request.request_header.push_back(line);
                LOG(INFO, line);
            }
            return stop;
        }
        void ParseHttpRequestLine()
        {
            std::stringstream ss(_http_request.request_line);
            //按空格解析请求行
            ss >> _http_request.method >> _http_request.uri >> _http_request.version;
            //LOG(INFO, _http_request.method);
            //LOG(INFO, _http_request.uri);
            //LOG(INFO, _http_request.version);
            std::transform(_http_request.method.begin(), _http_request.method.end(), _http_request.method.begin(), ::toupper);//将请求行全部转成大写
        }
        void ParseHttpRequestHeader()
        {
            std::string key;
            std::string value;
            for(auto& header : _http_request.request_header)
            {
                
                Util::SepString(header, key, value, SEP);
                //std::cout << "debug: " << key << std::endl;
                //std::cout << "debug: " << value << std::endl;
                _http_request.headerkv.insert({key, value});//分割完报头后忘记构建映射关系，导致出现bug
            }
        }
        //通过method以及Content-Length字段判断该http请求是否需要读取请求报文
        bool IsNeedRecvHttpRequestBody()
        {
            if(_http_request.method == "POST")
            {
                auto iter = _http_request.headerkv.find("Content-Length");
                if(iter != _http_request.headerkv.end())
                {
                    _http_request.content_length = atoi(iter->second.c_str());
                    LOG(INFO, "recv http request body: " + std::to_string(_http_request.content_length));
                    return true;
                }
            }
            return false;
        }
        //读取Http请求报文
        void RecvHttpRequestBody()
        {
            if(IsNeedRecvHttpRequestBody())
            {
                int content_length = _http_request.content_length;
                char ch;
                while(content_length)
                {
                    ssize_t s = recv(_sock, &ch, 1, 0);
                    if(s > 0)
                    {
                        _http_request.request_body.push_back(ch);
                        content_length--;
                    }
                    else
                    {
                        //TODO
                        stop = false;//读取失败
                        break;
                    }
                }
                //std::cout << "debug: " << _http_request.request_body << std::endl;
                LOG(INFO, _http_request.request_body);
            }
        }
        int ProcessNonCgi()
        {
            _http_response.fd = open(_http_request.path.c_str(), O_RDONLY);//打开要访问的网页所在的文件
            if(_http_response.fd >= 0)//打开文件成功
            {               
                return OK;
            }
                      
            return 404;
        }
        int ProcessCgi()
        {
            int code = OK;

            std::cout << "debug: " << "CGI MODEL" << std::endl;
            auto& bin = _http_request.path;//让子进程执行的目标程序，一定存在
            auto& method = _http_request.method;
            std::string& query_string = _http_request.query_string;
            std::string body_text = _http_request.request_body.c_str();

            std::string method_env;
            std::string query_string_env;
            std::string content_length_env;

            int input[2];//创建input管道，父进程从input中读取数据，子进程向input中写入数据
            int output[2];//创建output管道，父进程向output中写入数据，子进程从output中读取数据
            if(pipe(input) < 0)
            {
                LOG(ERROR, "pipe error");
                code = SERVER_ERROR;
                return code;
            }
            if(pipe(output) < 0)
            {
                LOG(ERROR, "pipe error");
                code = SERVER_ERROR;
                return code;
            }

            pid_t pid = fork();//创建子进程处理传来的数据
            if(pid == 0)
            {
                //child
                //约定：子进程向input写入数据，从output中读取数据，关闭input的读端和output的写端
                close(input[0]);
                close(output[1]);

                
                //子进程需要通过环境变量知道请求方法，再通过请求方法来判断从何处读取消息
                method_env = "METHOD=";
                method_env += method;
                putenv((char*)method_env.c_str());

                if("GET" == method)
                {
                    //若请求方法为GET，则提交的数据一般较少，此时通过环境变量传送
                    
                    query_string_env = "QUERY_STRING=";
                    query_string_env += query_string;
                    putenv((char*)query_string_env.c_str());
                }
                else if("POST" == method)
                {
                    content_length_env = "CONTENT_LENGTH=";//环境变量忘记加上=导致后续获取环境变量时导致了访问nullptr的错误
                    content_length_env += std::to_string(_http_request.content_length);
                    putenv((char*)content_length_env.c_str());
                    //LOG(WARNING, getenv("CONTENT_LENGTH"));
                }
                else
                {
                    //Do Nothing
                }

                //std::cout << "debug: bin -- " << bin << std::endl;

                //为了保证子进程在切换程序后仍然能够接收到父进程发送的数据同时给父进程发数据
                //让子进程的文件描述符0（标准输入）和1（标准输出）重定向为读取和写入
                //内核的数据结构在进程切换过程中会被保留
                //为了保证stdout的数据不影响子进程接收，将重定向放至最后进行
                dup2(output[0], 0);
                dup2(input[1], 1);

                //子进程切换到处理数据的进程
                execl(bin.c_str(), bin.c_str(), nullptr);
                exit(1);//到此处说明进程没有切换成功，则直接退出子进程
            }
            else if(pid > 0)
            {
                //parent
                //父进程向output写入数据，从input中读取数据，关闭input的写端和output的读端
                close(input[1]);
                close(output[0]);

                //管道创建完成，向子进程发送数据
                if("POST" == method)
                {
                    //请求方法为POST，通过管道向子进程发送数据
                    const char* start = body_text.c_str();
                    size_t size = 0;
                    size_t total = 0;
                    while((total < _http_request.content_length) && (size = write(output[1], start + total, body_text.size() - total)) > 0)
                    {
                        total += size;
                    }
                    //LOG(WARNING, std::to_string(total));
                }

                char ch;
                while(read(input[0], &ch, 1) > 0)
                {
                    //从管道读取的数据放到http响应报文中
                    _http_response.response_body.push_back(ch);
                }
                LOG(INFO, _http_response.response_body);

                int status = 0;
                pid_t ret = waitpid(pid, &status, 0);
                if(ret == pid)
                {
                    if(WIFEXITED(status))//正常退出
                    {
                        if(WEXITSTATUS(status) == 0)//且退出码为0(数据正确)
                        {
                            code = OK;
                            //LOG(INFO, "Cgi success");
                        }
                        else
                        {
                            code = BAD_REQUEST;
                            //LOG(WARNING, "Cgi Process Error");
                        }
                    }
                    else
                    {
                        code = SERVER_ERROR;
                        //LOG(WARNING, "Cgi Exit Error");
                    }
                }
                //为了尽量不浪费服务器资源，父进程等待完子进程后关闭管道
                close(input[0]);
                close(output[1]);
            }
            else
            {
                //创建子进程失败
                LOG(ERROR, "fork error!");
                code = SERVER_ERROR;
                return code;
            }
            return code;
        }
        void BuildOKResponse()
        {
            //状态行已经构建完成，开始构建响应报头
            std::string line;
            line = "Content-Type: ";
            line += Suffix2Desc(_http_request.suffix);
            line += END_LINE;
            _http_response.response_header.push_back(line);

            line = "Content-Length: ";
            if(_http_request.cgi)
            {
                //cgi模式，Content-Length为响应报文长度
                line += std::to_string(_http_response.response_body.size());
            }
            else
            {
                //非cgi模式，Content-Length为静态网页的大小
                line += std::to_string(_http_response.size);
            }
            line += END_LINE;
            _http_response.response_header.push_back(line);
        }
        void HandlerError(std::string page)
        {
            LOG(INFO, page);
            _http_request.cgi = false;//错误处理返回静态网页，故发送Http相应报文默认按非cgi模式

            //打开相应的静态网页
            _http_response.fd = open(page.c_str(), O_RDONLY);
            std::cout << "debug HandlerError: " << _http_response.fd << std::endl;

            std::string line;
            //返回相应的静态网页
            //构建响应报头
            line = "Content-Type: text/html";
            line += END_LINE;
            _http_response.response_header.push_back(line);

            line = "Content-Length: ";
            struct stat st;
            stat(page.c_str(), &st);//获取静态网页的信息
            _http_response.size = st.st_size;//需要将错误信息网页的大小带回，否则会出现报文长度为0
            line += std::to_string(st.st_size);
            line += END_LINE;
            _http_response.response_header.push_back(line);

        }
        /**
         * 根据不同的状态码构建相应的Http响应
        */
        void BuildHttpResponseHelper()
        {
            auto& code = _http_response.status_code;
            //构建状态行
            std::string& status_line = _http_response.status_line;

            status_line = HTTP_VERSION;
            status_line += " ";
            status_line += std::to_string(code);
            status_line += " ";
            status_line += Code2Desc(code);
            status_line += END_LINE;

            std::string path = WEB_ROOT;
            path += "/";

            //根据状态码构建对应的Http响应
            switch (code)
            {
            case OK:
                BuildOKResponse();
                break;
            case NOT_FOUND:
                path += PAGE_404;
                HandlerError(path);
                break;
            case BAD_REQUEST:
                path += PAGE_400;
                HandlerError(path);
                break;
            case SERVER_ERROR:
                path += PAGE_500;
                HandlerError(path);
                break;
            default:
                break;
            }
        }
    public:
        EndPoint(int sock):_sock(sock), stop(false)
        {}
        bool Stop()
        {
            return stop;
        }
        void RecvHttpRequest()
        {
            //接收读取Http请求
            if(!RecvHttpRequestLine() && !RecvHttpRequestHeader())
            {
                //只有当请求行和请求报头都读取成功后，才解析处理Http
                //解析处理Http
                ParseHttpRequestLine();
                ParseHttpRequestHeader();

                RecvHttpRequestBody();
            }
        } 

        //构建Http响应
        /**
         * 1.判断method是否为GET或者POST
         * 2.分析url获取path和query（如果有）
         * 3.判断对应path是否存在，若存在先判断是否为目录；若不存在则返回404
         * 
        */
        void BuildHttpResponse()
        {
            std::string _path;
            struct stat st;
            size_t found = 0;//查找资源后缀
            if(_http_request.method != "GET" && _http_request.method != "POST")
            {
                _http_response.status_code = BAD_REQUEST;
                LOG(WARNING, "http request method is not right");
                goto END;
            }
            if(_http_request.method == "GET")
            {
                //判断GET方法是否有资源请求
                size_t pos = _http_request.uri.find("?");
                if(pos != std::string::npos)
                {
                    //有数据上传
                    //path?query_string
                    Util::SepString(_http_request.uri, _http_request.path, _http_request.query_string, "?");
                    //std::cout << "debug: " << _http_request.path << std::endl;
                    //std::cout << "debug: " << _http_request.query_string << std::endl;
                    _http_request.cgi = true;//处理数据采用cgi模式

                }
                else
                {
                    //path
                    _http_request.path = _http_request.uri;
                    //std::cout << "debug: " << _http_request.uri << std::endl;

                }
            }
            else if(_http_request.method == "POST")
            {
                //POST方法，采用cgi模式
                _http_request.cgi = true;
                _http_request.path = _http_request.uri;//POST方法需要将路径名修改为uri
            }
            else
            {
                //DO NOTHING 
            }
            //添加为WEB默认根目录
            _path = _http_request.path;
            //std::cout << "debug: " << _path << std::endl;
            _http_request.path = WEB_ROOT + _path;
            if(_http_request.path[_http_request.path.size() - 1] == '/')
            {
                //默认路径
                _http_request.path += HOME_PAGE;
            }
            //std::cout << "debug: " << _http_request.path << std::endl;

            //解析path判断其是否存在于当前WEB目录中
            if(stat(_http_request.path.c_str(), &st) == 0)
            {
                //路径存在，判断是否为目录，若为目录，则访问默认文件
                if(S_ISDIR(st.st_mode))
                {
                    //当前路径为目录，但是末尾不带有/
                    _http_request.path += "/";
                    _http_request.path += HOME_PAGE;
                    stat(_http_request.path.c_str(), &st);//更新path
                }
                //若请求的为可执行程序,需要特殊处理
                if((st.st_mode & S_IXUSR) || (st.st_mode & S_IXGRP) || (st.st_mode & S_IXOTH))
                {
                    //特殊处理
                    _http_request.cgi = true;
                }
                _http_response.size = st.st_size;//获取要访问的资源的大小
                //std::cout << "debug: " << size << std::endl;
            }
            else
            {
                //路径不存在，返回NOT FOUND
                std::string info = _http_request.path;
                info += " NOT FOUND";
                LOG(WARNING, info);
                _http_response.status_code = NOT_FOUND;
                goto END;
            }
            //走到此处说明路径正确
            //处理该路径获取请求资源的类型(.html等),从后往前找.
            found = _http_request.path.rfind('.');
            if(found == std::string::npos)
            {
                //后缀设置为默认.html
                _http_request.suffix = ".html";
            }
            else
            {
                _http_request.suffix = _http_request.path.substr(found);
            }
            if(_http_request.cgi == true)
            {
                _http_response.status_code = ProcessCgi();//按cgi模式处理请求
            }
            else
            {
                //1.返回不单单是返回网页
                //2.而是要构建HTTP响应
                _http_response.status_code = ProcessNonCgi();//简单的返回网页，只返回静态网页
            }
END:
            BuildHttpResponseHelper();
        }
        //发送Http响应
        void SendHttpResponse()
        {
            send(_sock, _http_response.status_line.c_str(), _http_response.status_line.size(), 0);//发送Http响应状态行
            LOG(INFO, _http_response.status_line.c_str());
            for(auto iter : _http_response.response_header)
            {
                send(_sock, iter.c_str(), iter.size(), 0);//发送Http响应报头
                LOG(INFO, iter.c_str());
            }
            send(_sock, _http_response.blank.c_str(), _http_response.blank.size(), 0);//发送Http响应空行
            LOG(INFO, _http_response.blank.c_str());
            if(_http_request.cgi)
            {
                //cgi模式，发送Http相应报文
                ssize_t size = 0;
                size_t total = 0;
                const char* start = _http_response.response_body.c_str();
                //size = 表达式的右括号写道 > 0的后面去了，导致每次size赋值都是1，从而出现一直发送的现象
                while(total < _http_response.response_body.size() && (size = send(_sock, start + total, _http_response.response_body.size() - total, 0)) > 0)
                {
                    total += size;
                    std::cout << "debug: total: " << total <<std::endl;
                }
            }
            else
            {
                //非cgi模式，发送静态网页
                ssize_t s = sendfile(_sock, _http_response.fd, nullptr, _http_response.size);//发送Http响应报文,不通过用户层缓冲区string，直接从内核fd拷贝到内核sock
                //std::cout << "debug : s -- " << s << std::endl;
                LOG(INFO, std::to_string(_http_response.size));
                close(_http_response.fd);
            }
        }
        ~EndPoint()
        {
            close(_sock);
        }
};
class CallBack
{
public:
    CallBack()
    {}
    void operator()(int sock)
    {
        HandlerRequest(sock);
    }
    static void HandlerRequest(int sock)
    {
        //对sock发来的报文做处理，协议为Http
        /*int sock = *(int*)_sock;
        delete (int*)_sock;*/
        
        std::cout << "----------------begin----------------" << std::endl;

        EndPoint* ep = new EndPoint(sock);

        ep->RecvHttpRequest();
        if(!ep->Stop())
        {
            LOG(INFO, "Recv Success, Begin Build and Send");
            ep->BuildHttpResponse();
            ep->SendHttpResponse();
        }
        else
        {
            LOG(WARNING, "Recv Error, Stop Build and Send");
        }


        std::cout << "-----------------end-----------------" << std::endl;

        LOG(INFO, "Handler Request End");  
    }
    ~CallBack()
    {}
};

4️⃣CGI模式

HTTP CGI机制
CGI(Common Gateway Interface) 是WWW技术中重要的技术之一，有着不可替代的重要地位。CGI是外部应用程序（CGI程序）与WEB服务器之间的接口标准，是在CGI程序和Web服务器之间传递信息的过程。
在这里插入图片描述
通过上面这张图可以很清晰的理解CGI模式。

判断是否需要用CGI处理请求

首先对应GET方法，若请求行的URI中通过?带有参数，则说明需要CGI处理，而POST请求由于有数据传来，因此也需要CGI处理。

GET /test_cgi?a=100&b=200 HTTP/1.1

CGI模式为进程切换，由于HTTP服务器进程不能终止，因此应该创建子进程来进行进程切换到CGI处理程序。至于父子进程间的通信，若为GET方法，传入的参数一般不大，通过环境变量交互即可；若为POST方法，一般所传参数可能较大，则采用管道进行进程间通信。
CGI处理请求：

        int ProcessNonCgi()
        {
            _http_response.fd = open(_http_request.path.c_str(), O_RDONLY);//打开要访问的网页所在的文件
            if(_http_response.fd >= 0)//打开文件成功
            {               
                return OK;
            }
                      
            return 404;
        }
        int ProcessCgi()
        {
            int code = OK;

            std::cout << "debug: " << "CGI MODEL" << std::endl;
            auto& bin = _http_request.path;//让子进程执行的目标程序，一定存在
            auto& method = _http_request.method;
            std::string& query_string = _http_request.query_string;
            std::string body_text = _http_request.request_body.c_str();

            std::string method_env;
            std::string query_string_env;
            std::string content_length_env;

            int input[2];//创建input管道，父进程从input中读取数据，子进程向input中写入数据
            int output[2];//创建output管道，父进程向output中写入数据，子进程从output中读取数据
            if(pipe(input) < 0)
            {
                LOG(ERROR, "pipe error");
                code = SERVER_ERROR;
                return code;
            }
            if(pipe(output) < 0)
            {
                LOG(ERROR, "pipe error");
                code = SERVER_ERROR;
                return code;
            }

            pid_t pid = fork();//创建子进程处理传来的数据
            if(pid == 0)
            {
                //child
                //约定：子进程向input写入数据，从output中读取数据，关闭input的读端和output的写端
                close(input[0]);
                close(output[1]);

                
                //子进程需要通过环境变量知道请求方法，再通过请求方法来判断从何处读取消息
                method_env = "METHOD=";
                method_env += method;
                putenv((char*)method_env.c_str());

                if("GET" == method)
                {
                    //若请求方法为GET，则提交的数据一般较少，此时通过环境变量传送
                    
                    query_string_env = "QUERY_STRING=";
                    query_string_env += query_string;
                    putenv((char*)query_string_env.c_str());
                }
                else if("POST" == method)
                {
                    content_length_env = "CONTENT_LENGTH=";//环境变量忘记加上=导致后续获取环境变量时导致了访问nullptr的错误
                    content_length_env += std::to_string(_http_request.content_length);
                    putenv((char*)content_length_env.c_str());
                    //LOG(WARNING, getenv("CONTENT_LENGTH"));
                }
                else
                {
                    //Do Nothing
                }

                //std::cout << "debug: bin -- " << bin << std::endl;

                //为了保证子进程在切换程序后仍然能够接收到父进程发送的数据同时给父进程发数据
                //让子进程的文件描述符0（标准输入）和1（标准输出）重定向为读取和写入
                //内核的数据结构在进程切换过程中会被保留
                //为了保证stdout的数据不影响子进程接收，将重定向放至最后进行
                dup2(output[0], 0);
                dup2(input[1], 1);

                //子进程切换到处理数据的进程
                execl(bin.c_str(), bin.c_str(), nullptr);
                exit(1);//到此处说明进程没有切换成功，则直接退出子进程
            }
            else if(pid > 0)
            {
                //parent
                //父进程向output写入数据，从input中读取数据，关闭input的写端和output的读端
                close(input[1]);
                close(output[0]);

                //管道创建完成，向子进程发送数据
                if("POST" == method)
                {
                    //请求方法为POST，通过管道向子进程发送数据
                    const char* start = body_text.c_str();
                    size_t size = 0;
                    size_t total = 0;
                    while((total < _http_request.content_length) && (size = write(output[1], start + total, body_text.size() - total)) > 0)
                    {
                        total += size;
                    }
                    //LOG(WARNING, std::to_string(total));
                }

                char ch;
                while(read(input[0], &ch, 1) > 0)
                {
                    //从管道读取的数据放到http响应报文中
                    _http_response.response_body.push_back(ch);
                }
                LOG(INFO, _http_response.response_body);

                int status = 0;
                pid_t ret = waitpid(pid, &status, 0);
                if(ret == pid)
                {
                    if(WIFEXITED(status))//正常退出
                    {
                        if(WEXITSTATUS(status) == 0)//且退出码为0(数据正确)
                        {
                            code = OK;
                            //LOG(INFO, "Cgi success");
                        }
                        else
                        {
                            code = BAD_REQUEST;
                            //LOG(WARNING, "Cgi Process Error");
                        }
                    }
                    else
                    {
                        code = SERVER_ERROR;
                        //LOG(WARNING, "Cgi Exit Error");
                    }
                }
                //为了尽量不浪费服务器资源，父进程等待完子进程后关闭管道
                close(input[0]);
                close(output[1]);
            }
            else
            {
                //创建子进程失败
                LOG(ERROR, "fork error!");
                code = SERVER_ERROR;
                return code;
            }
            return code;
        }

构建任务&线程池管理

对于一个个的Http请求，若需要采用CGI模式，可以通过任务回调的方法来实现HTTP服务器与CGI处理程序的解耦，并且我们可以采用线程池来处理任务，因为一般服务器接收的请求量会很大，若每个请求都要创建线程的话最终可能导致服务器崩溃，但是即便采用线程池也并不能非常好的解决这个问题，可以采用Epoll多路转接技术，不过本项目在于学习Http协议及CGI机制，因此此处处理采用线程池。

#pragma once

#include "Protocal.hpp"


/**
 * 管理任务对象，负责将相应任务通过回调进行处理
*/
class Task{
private:
    int sock;//该任务对应的sock
    CallBack handler;//处理任务的回调方法
public:
    Task()
    {}
    Task(int _sock):sock(_sock)
    {}
    void ProcessOn()
    {
        handler(sock);
    }
    ~Task()
    {}
};

#pragma once

#include <queue>
#include <pthread.h>

#include "Task.hpp"
#include "log.hpp"

#define NUM 6

class ThreadPool
{
private:
    int num;//线程池中线程的数量
    std::queue<Task> task_queue;//任务队列，线程从该队列中获取任务
    pthread_mutex_t lock;//处理临界区的锁
    pthread_cond_t cond;//条件变量
    bool stop;//判断线程池是否停止工作
    ThreadPool(int _num = NUM):num(_num), stop(false)
    {
        pthread_mutex_init(&lock, nullptr);
        pthread_cond_init(&cond, nullptr);
    }
    ThreadPool(const ThreadPool &)
    {}

    static ThreadPool* single_instance;
public:
    static ThreadPool* getInstance()
    {
        static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;//静态锁
        if(nullptr == single_instance)
        {
            pthread_mutex_lock(&mtx);
            if(nullptr == single_instance)
            {
                single_instance = new ThreadPool();
                single_instance->InitThreadPool();
            }
            pthread_mutex_unlock(&mtx);
        }
        return single_instance;
    }
    void Lock()
    {
        pthread_mutex_lock(&lock);
    }
    void UnLock()
    {
        pthread_mutex_unlock(&lock);
    }
    bool IsStop()
    {
        return stop;
    }
    bool IsTaskQueueEmpty()
    {
        return task_queue.size() == 0 ? true : false;
    }
    static void* ThreadRoutine(void* args)//线程例程
    {
        ThreadPool* tp = (ThreadPool*)args;
        while(!tp->IsStop())
        {
            Task t;
            tp->Lock();//加锁
            while(tp->IsTaskQueueEmpty())
            {
                //任务队列为空,线程休眠
                tp->Wait();//当线程唤醒之后，此时一定是带有锁的
            }
            //获取任务
            tp->PopTask(t);
            tp->UnLock();//解锁
            t.ProcessOn();//调用任务回调函数处理任务
        }
    }
    bool InitThreadPool()
    {
        for(int i = 0; i < num; i++)
        {
            pthread_t pid;
            if(pthread_create(&pid, nullptr, ThreadRoutine, this) != 0)
            {
                LOG(FATAL, "create threadpool error!");
                return false;
            }
        }
        LOG(INFO, "create threadpool success");
        return true;
    }
    /**
     * 若暂时无任务处理，则让线程休眠
    */
    void Wait()
    {
        pthread_cond_wait(&cond, &lock);
    }
    /**
     * 当有任务到来时，唤醒休眠的线程
    */
    void WakeUp()
    {
        pthread_cond_signal(&cond);
    }
    /**
     * 服务器调用PushTask往线程池的任务队列中添加任务
    */
    void PushTask(const  Task& t)
    {
        Lock();
        task_queue.push(t);
        UnLock();
        WakeUp();//任务到来，唤醒任务
    }
    /**
     * 线程从任务队列中取任务
    */
    void PopTask(Task& t)
    {
        t = task_queue.front();
        task_queue.pop();
    }
    ~ThreadPool()
    {
        pthread_mutex_destroy(&lock);
        pthread_cond_destroy(&cond);
    }
};

ThreadPool* ThreadPool::single_instance = nullptr;

5️⃣实验结果及总结

项目源码：

自主web服务器项目源码

测试服务器各种情况

启动服务器后，访问页面：
GET方法提交数据及构建响应：
在这里插入图片描述

POST方法提交数据
在这里插入图片描述
点击提交数据后构建响应：

差错处理：
服务器处理数据异常时：

访问资源不存在时：
在这里插入图片描述

总结

http协议被广泛使用，从移动端，pc端浏览器，http协议无疑是打开互联网应用窗口的重要协议，http在网络应用层中的地位不可撼动，是能准确区分前后台的重要协议。
本次对http协议的理论学习，认识理解并运用CGI模式处理请求，在完成项目的过程中也遇到过许许多多，大大小小的bug，通过不断的调试最终获得较为不错的结果。该项目亦有许多不足值得完善，比如线程池的改良，接入MYSQL数据库进行数据管理，以及实现HTTP/1.1长连接功能等待，仍然有许多值得学习的地方。