问题现象
本地服务启动,发现调用FeignClient的服务,跑的是sit的服务,而本地是uat的环境配置。
问题跟踪
feign.SynchronousMethodHandler#invoke
,调用远程服务。
public Object invoke(Object[] argv) throws Throwable {
RequestTemplate template = buildTemplateFromArgs.create(argv);
Options options = findOptions(argv);
Retryer retryer = this.retryer.clone();
while (true) {
try {
return executeAndDecode(template, options);
} catch (RetryableException e) {
try {
retryer.continueOrPropagate(e);
} catch (RetryableException th) {
Throwable cause = th.getCause();
if (propagationPolicy == UNWRAP && cause != null) {
throw cause;
} else {
throw th;
}
}
if (logLevel != Logger.Level.NONE) {
logger.logRetry(metadata.configKey(), logLevel);
}
continue;
}
}
}
Object executeAndDecode(RequestTemplate template, Options options) throws Throwable {
Request request = targetRequest(template);
if (logLevel != Logger.Level.NONE) {
logger.logRequest(metadata.configKey(), logLevel, request);
}
Response response;
long start = System.nanoTime();
try {
response = client.execute(request, options);
// ensure the request is set. TODO: remove in Feign 12
response = response.toBuilder()
.request(request)
.requestTemplate(template)
.build();
} catch (IOException e) {
if (logLevel != Logger.Level.NONE) {
logger.logIOException(metadata.configKey(), logLevel, e, elapsedTime(start));
}
throw errorExecuting(request, e);
}
long elapsedTime = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
if (decoder != null)
return decoder.decode(response, metadata.returnType());
CompletableFuture<Object> resultFuture = new CompletableFuture<>();
asyncResponseHandler.handleResponse(resultFuture, metadata.configKey(), response,
metadata.returnType(),
elapsedTime);
try {
if (!resultFuture.isDone())
throw new IllegalStateException("Response handling not done");
return resultFuture.join();
} catch (CompletionException e) {
Throwable cause = e.getCause();
if (cause != null)
throw cause;
throw e;
}
}
RetryableFeignBlockingLoadBalancerClient#execute
,核心方法是retrievedServiceInstance = loadBalancerClient.choose(serviceId, lbRequest);
,通过负载均衡的客户端获取对应serviceId的服务实例,该实例实体信息中有具体服务的IP地址,观察到是sit环境。
@Override
public Response execute(Request request, Request.Options options) throws IOException {
final URI originalUri = URI.create(request.url());
String serviceId = originalUri.getHost();
Assert.state(serviceId != null, "Request URI does not contain a valid hostname: " + originalUri);
final LoadBalancedRetryPolicy retryPolicy = loadBalancedRetryFactory.createRetryPolicy(serviceId,
loadBalancerClient);
RetryTemplate retryTemplate = buildRetryTemplate(serviceId, request, retryPolicy);
return retryTemplate.execute(context -> {
Request feignRequest = null;
ServiceInstance retrievedServiceInstance = null;
Set<LoadBalancerLifecycle> supportedLifecycleProcessors = LoadBalancerLifecycleValidator
.getSupportedLifecycleProcessors(
loadBalancerClientFactory.getInstances(serviceId, LoadBalancerLifecycle.class),
RetryableRequestContext.class, ResponseData.class, ServiceInstance.class);
String hint = getHint(serviceId);
DefaultRequest<RetryableRequestContext> lbRequest = new DefaultRequest<>(
new RetryableRequestContext(null, buildRequestData(request), hint));
// On retries the policy will choose the server and set it in the context
// and extract the server and update the request being made
if (context instanceof LoadBalancedRetryContext) {
LoadBalancedRetryContext lbContext = (LoadBalancedRetryContext) context;
ServiceInstance serviceInstance = lbContext.getServiceInstance();
if (serviceInstance == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Service instance retrieved from LoadBalancedRetryContext: was null. "
+ "Reattempting service instance selection");
}
ServiceInstance previousServiceInstance = lbContext.getPreviousServiceInstance();
lbRequest.getContext().setPreviousServiceInstance(previousServiceInstance);
supportedLifecycleProcessors.forEach(lifecycle -> lifecycle.onStart(lbRequest));
retrievedServiceInstance = loadBalancerClient.choose(serviceId, lbRequest);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Selected service instance: %s", retrievedServiceInstance));
}
lbContext.setServiceInstance(retrievedServiceInstance);
}
if (retrievedServiceInstance == null) {
if (LOG.isWarnEnabled()) {
LOG.warn("Service instance was not resolved, executing the original request");
}
org.springframework.cloud.client.loadbalancer.Response<ServiceInstance> lbResponse = new DefaultResponse(
retrievedServiceInstance);
supportedLifecycleProcessors.forEach(lifecycle -> lifecycle
.onComplete(new CompletionContext<ResponseData, ServiceInstance, RetryableRequestContext>(
CompletionContext.Status.DISCARD, lbRequest, lbResponse)));
feignRequest = request;
}
else {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Using service instance from LoadBalancedRetryContext: %s",
retrievedServiceInstance));
}
String reconstructedUrl = loadBalancerClient.reconstructURI(retrievedServiceInstance, originalUri)
.toString();
feignRequest = buildRequest(request, reconstructedUrl);
}
}
org.springframework.cloud.client.loadbalancer.Response<ServiceInstance> lbResponse = new DefaultResponse(
retrievedServiceInstance);
Response response = LoadBalancerUtils.executeWithLoadBalancerLifecycleProcessing(delegate, options,
feignRequest, lbRequest, lbResponse, supportedLifecycleProcessors,
retrievedServiceInstance != null);
int responseStatus = response.status();
if (retryPolicy != null && retryPolicy.retryableStatusCode(responseStatus)) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Retrying on status code: %d", responseStatus));
}
byte[] byteArray = response.body() == null ? new byte[] {}
: StreamUtils.copyToByteArray(response.body().asInputStream());
response.close();
throw new LoadBalancerResponseStatusCodeException(serviceId, response, byteArray,
URI.create(request.url()));
}
return response;
}, new LoadBalancedRecoveryCallback<Response, Response>() {
@Override
protected Response createResponse(Response response, URI uri) {
return response;
}
});
}
服务注册与发现
观察Nacos平台,查看到当本地环境启动的时候,sit环境Nacos服务管理–>服务列表中对应服务新增了一个实例。
NacosNamingService#registerInstance()
,注册服务实例并且发送心跳任务
@Override
public void registerInstance(String serviceName, String groupName, Instance instance) throws NacosException {
NamingUtils.checkInstanceIsLegal(instance);
String groupedServiceName = NamingUtils.getGroupedName(serviceName, groupName);
if (instance.isEphemeral()) {
BeatInfo beatInfo = beatReactor.buildBeatInfo(groupedServiceName, instance);
beatReactor.addBeatInfo(groupedServiceName, beatInfo);
}
serverProxy.registerService(groupedServiceName, groupName, instance);
}
NamingProxy#registerService
,封装参数,发送请求。由于观察到的namespaceId
是sit的,而不是uat的,观察namespaceId
的赋值过程。
public void registerService(String serviceName, String groupName, Instance instance) throws NacosException {
NAMING_LOGGER.info("[REGISTER-SERVICE] {} registering service {} with instance: {}", namespaceId, serviceName,
instance);
final Map<String, String> params = new HashMap<String, String>(16);
params.put(CommonParams.NAMESPACE_ID, namespaceId);
params.put(CommonParams.SERVICE_NAME, serviceName);
params.put(CommonParams.GROUP_NAME, groupName);
params.put(CommonParams.CLUSTER_NAME, instance.getClusterName());
params.put("ip", instance.getIp());
params.put("port", String.valueOf(instance.getPort()));
params.put("weight", String.valueOf(instance.getWeight()));
params.put("enable", String.valueOf(instance.isEnabled()));
params.put("healthy", String.valueOf(instance.isHealthy()));
params.put("ephemeral", String.valueOf(instance.isEphemeral()));
params.put("metadata", JacksonUtils.toJson(instance.getMetadata()));
reqApi(UtilAndComs.nacosUrlInstance, params, HttpMethod.POST);
}
NacosNamingService#init
,初始化NamingProxy
private void init(Properties properties) throws NacosException {
ValidatorUtils.checkInitParam(properties);
this.namespace = InitUtils.initNamespaceForNaming(properties);
InitUtils.initSerialization();
initServerAddr(properties);
InitUtils.initWebRootContext(properties);
initCacheDir();
initLogName(properties);
this.serverProxy = new NamingProxy(this.namespace, this.endpoint, this.serverList, properties);
this.beatReactor = new BeatReactor(this.serverProxy, initClientBeatThreadCount(properties));
this.hostReactor = new HostReactor(this.serverProxy, beatReactor, this.cacheDir, isLoadCacheAtStart(properties),
isPushEmptyProtect(properties), initPollingThreadCount(properties));
}
NacosWatch#start
,NacosWatch
实现了SmartLifecycle
,启动的时候会执行。所以是NacosDiscoveryProperties
的配置是错误的,导致Nacos注册错误,Feign调用错误。
private final NacosDiscoveryProperties properties;
@Override
public void start() {
if (this.running.compareAndSet(false, true)) {
EventListener eventListener = listenerMap.computeIfAbsent(buildKey(),
event -> new EventListener() {
@Override
public void onEvent(Event event) {
if (event instanceof NamingEvent) {
List<Instance> instances = ((NamingEvent) event)
.getInstances();
Optional<Instance> instanceOptional = selectCurrentInstance(
instances);
instanceOptional.ifPresent(currentInstance -> {
resetIfNeeded(currentInstance);
});
}
}
});
NamingService namingService = nacosServiceManager
.getNamingService(properties.getNacosProperties());
try {
namingService.subscribe(properties.getService(), properties.getGroup(),
Arrays.asList(properties.getClusterName()), eventListener);
}
catch (Exception e) {
log.error("namingService subscribe failed, properties:{}", properties, e);
}
this.watchFuture = this.taskScheduler.scheduleWithFixedDelay(
this::nacosServicesWatch, this.properties.getWatchDelay());
}
}
排查bootstrap.yml
和远程配置,发现远程配置的是sit环境,bootstrap.yml
中配置的是uat环境,去除远程配置的服务发现配置,问题解决。