在这个函数中,会调用getsockopt函数,获取socket套接字是否有连接的异常,但是这个getsockopt函数有个连续调用的问题,如果你第二次调用这个函数,之前fd上的错误就被清除了,导致本来这个socket套接字是有问题的,但是检测不出来了,目前这个evhttp_connection_cb的bug就在这里,如果后端服务器根本就没有开启的情况下,该函数中的getsockopt也检测不到异常了,导致其误以为连接上了,流程会一直往下走,会继续去读取后端服务器的body,却发现读不到数据,导致连接503错误,原本应该是getsockopt检测到连接异常后,就响应500的错误的。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81/*
* Event callback for asynchronous connection attempt.
*/
static void
evhttp_connection_cb(struct bufferevent *bufev, short what, void *arg)
{
struct evhttp_connection *evcon = arg;
int error;
ev_socklen_t errsz = sizeof(error);
if (evcon->fd == -1)
evcon->fd = bufferevent_getfd(bufev);
if (!(what & BEV_EVENT_CONNECTED)) {
/* some operating systems return ECONNREFUSED immediately
* when connecting to a local address. the cleanup is going
* to reschedule this function call.
*/
if (errno == ECONNREFUSED)
goto cleanup;
evhttp_error_cb(bufev, what, arg);
return;
}
if (evcon->fd == -1) {
event_debug(("%s: bufferevent_getfd returned -1",
__func__));
goto cleanup;
}
/* Check if the connection completed */
if (getsockopt(evcon->fd, SOL_SOCKET, SO_ERROR, (void*)&error,
&errsz) == -1) {
event_debug(("%s: getsockopt for \"%s:%d\" on "EV_SOCK_FMT,
__func__, evcon->address, evcon->port,
EV_SOCK_ARG(evcon->fd)));
goto cleanup;
}
if (error) {
event_debug(("%s: connect failed for \"%s:%d\" on "
EV_SOCK_FMT": %s",
__func__, evcon->address, evcon->port,
EV_SOCK_ARG(evcon->fd),
evutil_socket_error_to_string(error)));
goto cleanup;
}
/* We are connected to the server now */
event_debug(("%s: connected to \"%s:%d\" on "EV_SOCK_FMT"\n",
__func__, evcon->address, evcon->port,
EV_SOCK_ARG(evcon->fd)));
/* Reset the retry count as we were successful in connecting */
evcon->retry_cnt = 0;
evcon->state = EVCON_IDLE;
/* reset the bufferevent cbs */
bufferevent_setcb(evcon->bufev,
evhttp_read_cb,
evhttp_write_cb,
evhttp_error_cb,
evcon);
if (!evutil_timerisset(&evcon->timeout)) {
const struct timeval read_tv = { HTTP_READ_TIMEOUT, 0 };
const struct timeval write_tv = { HTTP_WRITE_TIMEOUT, 0 };
bufferevent_set_timeouts(evcon->bufev, &read_tv, &write_tv);
} else {
bufferevent_set_timeouts(evcon->bufev, &evcon->timeout, &evcon->timeout);
}
/* try to start requests that have queued up on this connection */
evhttp_request_dispatch(evcon);
return;
cleanup:
evhttp_connection_cb_cleanup(evcon);
}
解决方案
务必对what进行判断,有BEV_EVENT_ERROR标志时代表连接失败。
回调函数分析
具体是哪里已经调用了getsockopt函数呢?这里需要追踪一下bufferevent的回调函数机制。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77int
evhttp_connection_connect_(struct evhttp_connection *evcon)
{
int old_state = evcon->state;
const char *address = evcon->address;
const struct sockaddr *sa = evhttp_connection_get_addr(evcon);
int ret;
if (evcon->state == EVCON_CONNECTING)
return (0);
evhttp_connection_reset_(evcon);
EVUTIL_ASSERT(!(evcon->flags & EVHTTP_CON_INCOMING));
evcon->flags |= EVHTTP_CON_OUTGOING;
if (evcon->bind_address || evcon->bind_port) {
evcon->fd = bind_socket(
evcon->bind_address, evcon->bind_port, 0 /*reuse*/);
if (evcon->fd == -1) {
event_debug(("%s: failed to bind to \"%s\"",
__func__, evcon->bind_address));
return (-1);
}
if (bufferevent_setfd(evcon->bufev, evcon->fd))
return (-1);
} else {
if (bufferevent_setfd(evcon->bufev, -1))
return (-1);
}
/* Set up a callback for successful connection setup */
bufferevent_setcb(evcon->bufev,
NULL /* evhttp_read_cb */,
NULL /* evhttp_write_cb */,
evhttp_connection_cb,
evcon);
if (!evutil_timerisset(&evcon->timeout)) {
const struct timeval conn_tv = { HTTP_CONNECT_TIMEOUT, 0 };
bufferevent_set_timeouts(evcon->bufev, &conn_tv, &conn_tv);
} else {
bufferevent_set_timeouts(evcon->bufev, &evcon->timeout, &evcon->timeout);
}
/* make sure that we get a write callback */
if (bufferevent_enable(evcon->bufev, EV_WRITE))
return (-1);
evcon->state = EVCON_CONNECTING;
if (evcon->flags & EVHTTP_CON_REUSE_CONNECTED_ADDR &&
sa &&
(sa->sa_family == AF_INET || sa->sa_family == AF_INET6)) {
int socklen = sizeof(struct sockaddr_in);
if (sa->sa_family == AF_INET6) {
socklen = sizeof(struct sockaddr_in6);
}
ret = bufferevent_socket_connect(evcon->bufev, sa, socklen);
} else {
ret = bufferevent_socket_connect_hostname(evcon->bufev,
evcon->dns_base, evcon->ai_family, address, evcon->port);
}
if (ret < 0) {
evcon->state = old_state;
event_sock_warn(evcon->fd, "%s: connection to \"%s\" failed",
__func__, evcon->address);
/* some operating systems return ECONNREFUSED immediately
* when connecting to a local address. the cleanup is going
* to reschedule this function call.
*/
evhttp_connection_cb_cleanup(evcon);
return (0);
}
return (0);
}
在这个连接函数中,libevent作为客户端,需要建立socket套接字,进行地址绑定,然后connect服务器,这个socket会设置为noblocking的,后续再对是否连接成功进行判断,不会傻傻的等待连接完成,查看bufferevent_socket_connect函数源码可得知,连接操作完成后会设置bufev_p->connecting = 1;。
另外,在这个连接函数中,会设置好用户连接回调函数,bufferevent_setcb设置为evhttp_connection_cb,这个函数在第一节说过了,那什么时候会调用这个回调函数呢?
这个用户回调函数在bufferevent_run_eventcb_中会调用,而bufferevent_run_eventcb_又会在bufferevent_writecb中被调用,什么情况下调用呢?
bufferevent_writecb中会调用evutil_socket_finished_connecting_函数检测连接状态,注意了,evutil_socket_finished_connecting_函数中正是通过getsockopt来检测连接状态的,当检测到连接失败时,就会调用bufferevent_run_eventcb_函数,即用户自定义的连接回调函数,也就是我们自己定义的连接回调函数,并将错误码BEV_EVENT_ERROR传递过去,所以呀,最后在我们自己写的回调函数evhttp_connection_cb里面是getsockopt不到值来的,只能通过what值即是否为BEV_EVENT_ERROR来检测是否有连接异常。
最后,可能你还想问,bufferevent_writecb又是什么呢?什么情况下调用它?
事实上,我们在evhttp_connection_connect_函数中调用bufferevent_setfd时,就会设置好libevent的事件,事件规定,当有写事件过来时,就会进入bufferevent_writecb函数处理,如下代码显示,这下终于整明白了。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26static void
be_socket_setfd(struct bufferevent *bufev, evutil_socket_t fd)
{
struct bufferevent_private *bufev_p = BEV_UPCAST(bufev);
BEV_LOCK(bufev);
EVUTIL_ASSERT(BEV_IS_SOCKET(bufev));
event_del(&bufev->ev_read);
event_del(&bufev->ev_write);
evbuffer_unfreeze(bufev->input, 0);
evbuffer_unfreeze(bufev->output, 1);
event_assign(&bufev->ev_read, bufev->ev_base, fd,
EV_READ|EV_PERSIST|EV_FINALIZE, bufferevent_readcb, bufev);
event_assign(&bufev->ev_write, bufev->ev_base, fd,
EV_WRITE|EV_PERSIST|EV_FINALIZE, bufferevent_writecb, bufev);
if (fd >= 0)
bufferevent_enable(bufev, bufev->enabled);
evutil_getaddrinfo_cancel_async_(bufev_p->dns_request);
BEV_UNLOCK(bufev);
}
目前,http.c文件中evhttp_connection_cb函数还是会有getsockopt函数的调用,虽然前面有对what进行检测,但是总感觉getsockopt已经没有作用了,为什么要这么写?