1.效果演示
1.1 热点问题列表
启动程序后,自动展示热点问题,并等待终端输入
1.2 根据序号选择想看的热点问题
输入问题序号,展示回答内容
1.3 退出
输入q即可退出程序
2.源码
2.1 pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>zhihu</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.9.0</version>
</dependency>
</dependencies>
</project>
2.2 Java代码
package org.example;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
public class ZhihuHotHourCrawler {
final static String ZHIHU_HOT_URL = "https://www.zhihu.com/api/v4/creators/rank/hot?domain=0&period=hour";
final static String QUESTION_HTML_MATCH_PREFIX = "<script id=\"js-initialData\" type=\"text/json\">";
final static String QUESTION_HTML_MATCH_SUFFIX = "</script>";
public static String getHtml(String urlString) {
StringBuffer response = new StringBuffer();
URL url = null;
try {
url = new URL(urlString);
URLConnection connection = url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
} catch (Exception e) {
e.printStackTrace();
}
return response.toString();
}
public static void parseHotHtml(String hotHtml, Map<String, String> titleAndUrl, Map<String, String> indexAndTitle) {
// 解析知乎小时榜页HTML,返回所有热搜问题标题和问题链接
Gson gson = new Gson();
JsonObject jsonObject = gson.fromJson(hotHtml, JsonObject.class);
int index = 1;
for (JsonElement item : jsonObject.get("data").getAsJsonArray()) {
JsonObject question = item.getAsJsonObject().get("question").getAsJsonObject();
String questionUrl = question.get("url").getAsString();
String questionTitle = question.get("title").getAsString();
titleAndUrl.put(questionTitle, questionUrl);
indexAndTitle.put(String.valueOf(index), questionTitle);
index++;
}
}
public static String removeHtmlTag(String content) {
StringBuilder sb = new StringBuilder(content);
while (true) {
int tagStartIndex = sb.indexOf("<");
if (tagStartIndex < 0) {
return sb.toString();
}
int tagEndIndex = sb.indexOf(">", tagStartIndex);
sb.delete(tagStartIndex, tagEndIndex + 1);
}
}
public static void parseQuestionHtml(String questionHtml) {
int prefixIndex = questionHtml.indexOf(QUESTION_HTML_MATCH_PREFIX);
int suffixIndex = questionHtml.indexOf(QUESTION_HTML_MATCH_SUFFIX, prefixIndex);
String jsonStr = questionHtml.substring(prefixIndex + QUESTION_HTML_MATCH_PREFIX.length(), suffixIndex);
// 解析知乎问题页HTML,输出问题对应的回答内容
Gson gson = new Gson();
JsonObject jsonObject = gson.fromJson(jsonStr, JsonObject.class);
JsonObject answers = jsonObject.get("initialState").getAsJsonObject().get("entities").getAsJsonObject().get("answers").getAsJsonObject();
int answerNum = 1;
for (String answerId : answers.keySet()) {
JsonObject answer = answers.get(answerId).getAsJsonObject();
String content = answer.get("content").getAsString();
String finalContent = removeHtmlTag(content);
System.out.println("A" + answerNum + ": " + finalContent);
answerNum++;
}
}
public static void main(String[] args) {
String hotHtml = getHtml(ZHIHU_HOT_URL);
Map<String, String> titleAndUrl = new LinkedHashMap<>();
Map<String, String> indexAndTitle = new LinkedHashMap<>();
parseHotHtml(hotHtml, titleAndUrl, indexAndTitle);
for (String key : indexAndTitle.keySet()) {
System.out.println(key + "." + indexAndTitle.get(key));
}
while (true) {
Scanner scanner = new Scanner(System.in);
System.out.print("请输入序号:");
String nextLine = scanner.nextLine();
if (nextLine.equals("q")) {
break;
} else {
String questionUrl = titleAndUrl.get(indexAndTitle.get(nextLine));
String questionHtml = getHtml(questionUrl);
parseQuestionHtml(questionHtml);
}
}
}
}
3.补充
如果不好使了,可以留言,我更新一下代码(如果有时间的话😂)。