1. 前端JS相关
- 三元运算
v1 = 条件 ? 值A : 值B; # 如果条件成立v1=值A,不成立v1等于值B
res = 1 === 1 ? 99 : 88 # res=99
- 特殊的逻辑运算
v1 = 1===1 || 2===2 # Ture
v2 = 9 || 14 # 9
v3 = 0 || 15 # 15
v3 = 0 || 15 || "zhangfei" # 15
- 赋值和比较
v1 = 11 === (n=123) # Flase
- 案例:
v1 = 1 > ( n = 2) || 1 === 1 ? 9 :8
# 分析
n = 2
v1 = 9
var o = (null === (n = window.byted_acrawler) || void 0 === n ? void 0 : null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, i)) || "";
void 0 -> undifined
# 分析(window.byted_acrawler不为空、window.byted_acrawler.sign不为空)
var o = (null === (n = window.byted_acrawler) || void 0 === n ? void 0 : null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, i)) || "";
var o = window.byted_acrawler.sign.call(n,i) || ""
var o = window.byted_acrawler.sign.call(n,i)
- 执行函数
function sign(v1){
// this在函数内部
console.log(v1);
}
// 执行,函数内部this=window全局对象
sign(123) # 123
// 执行函数内部会把第一个参数赋值给 this=123
sign.call(123,456) # 456
// n就会传递给call函数中this
// i当做参数传递
var o = window.byted_acrawler.sign.call(n,i)
var o = window.byted_acrawler.sign(i)
- 扩展
# 之前的javascript不支持面向对象,通过将函数去伪造
function Person(name,age){
this.name=name;
this.age = age
}
obj = new Person("张飞",123)
- 函数的参数
function sign(){
console.log(arguments)
}
sign()
sign(11,22,33)
sign(11,22,44,55)
虽然没定义参数,但是可以传入参数
- 合并对象补充JS环境
v1 = { k1: 123 }
v2 = { k2:99, k3:888}
Objects.assign(v1,v2) # 将第二个字典全部更新到V1;和python字典update很像
console.log(v1) # {k1: 123, k2:99, k3:888}
2.编译js代码
2.1 node.js编译代码
v1.js
function func(arg) {
return arg + 'i666';
}
let data = func("老铁");
console.log(data)
- node编译执行
- python执行执行本地命令:
node v1.js
import os
import subprocess
# 根据自己的操作系统去修改(相当于python的sys.path,加载安装的模块)
os.environ["NODE_PATH"] = "/usr/local/lib/node_modules/"
signature = subprocess.getoutput('node v1.js')
2.2 pyexecjs编译代码
准备环境:
- node.js
- pyexecjs模块
pip install pyexecjs
例如:
v2.js
function func(arg) {
return arg + '666';
}
- 执行js代码
import execjs
import os
os.environ["NODE_PATH"] = "/usr/local/lib/node_modules/"
with open('v2.js', mode='r', encoding='utf-8') as f:
js = f.read()
JS = execjs.compile(js)
sign = JS.call("func", "微信")
print(sign) # 微信666
node.js:电脑上安装上node.js之后(编译器,相当于装CPython解释器), 自动安装npm(第三方包管理器,相当于pip)
2.3 浏览器环境
有些JS的代码你从别的地拿过来执行的时候不成功,因为需要模拟浏览器环境
环境准备:
- node.js
- jsdom(通过后端node+js代码实现伪造浏览器环境)
npm install node-gyp@latest sudo npm explore -g npm -- npm i node-gyp@latest
npm install jsdom -g # -g全局安装
注意:上述安装成功后已可以模拟浏览器环境,由于今天的头条他的内容。
npm install canvas -g
方式一:v10.js
const jsdom = require("jsdom");
const {JSDOM} = jsdom;
const resourceLoader = new jsdom.ResourceLoader({
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36",
});
const html = `<!DOCTYPE html><p>Hello world</p>`;
const dom = new JSDOM(html, {
url: "https://www.toutiao.com",
referrer: "https://example.com/",
contentType: "text/html",
resources: resourceLoader,
});
console.log(dom.window.location)
console.log(dom.window.navigator.userAgent)
console.log(dom.window.document.referrer)
import os
import subprocess
# 根据自己的操作系统去修改(相当于python的sys.path,加载安装的模块)
os.environ["NODE_PATH"] = "/usr/local/lib/node_modules/"
res = subprocess.getoutput('node v10.js')
方式二:无法补充环境时
const jsdom = require("jsdom");
const {JSDOM} = jsdom;
const resourceLoader = new jsdom.ResourceLoader({
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36"
});
const html = `<!DOCTYPE html><p>Hello world</p>`;
const dom = new JSDOM(html, {
url: "https://www.toutiao.com",
referrer: "https://example.com/",
contentType: "text/html",
resources: resourceLoader,
});
/*
console.log(dom.window.location)
console.log(dom.window.navigator.userAgent)
console.log(dom.window.document.referrer)
*/
window = global;
const params = {
location: {
hash: "",
host: "www.toutiao.com",
hostname: "www.toutiao.com",
href: "https://www.toutiao.com",
origin: "https://www.toutiao.com",
pathname: "/",
port: "",
protocol: "https:",
search: "",
},
navigator: {
appCodeName: "Mozilla",
appName: "Netscape",
appVersion: "5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
cookieEnabled: true,
deviceMemory: 8,
doNotTrack: null,
hardwareConcurrency: 4,
language: "zh-CN",
languages: ["zh-CN", "zh"],
maxTouchPoints: 0,
onLine: true,
platform: "MacIntel",
product: "Gecko",
productSub: "20030107",
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
vendor: "Google Inc.",
vendorSub: "",
webdriver: false
}
};
Object.assign(global,params) # location、navigator设置成了全局变量
# 在下面如果你使用location.href、navigator.appCodeName
我们在上面代码加入window = global这样window.location.href、window.appCodeName也能够获取到
注意:在nodejs中默认代码中会有一个global的关键字(全局变量)。
v1 = 123; # 写了个全局变量,相当于global赋了个值
console.log(global);
global.v1 = 123
global.v2 = 123
global.navigator = {
...
}
console.log(v1,v2);
navigator.userAgent
3.头条
3.1 分析请求
直接发送获取到结果:
import requests
# 这后面的就是我们需要注意的签名_02B4Z6wo009010IJgRwAAIDDtGCIOlEVa8tCLYWAALV5CV7lvAp2MWxOhC9EGgecK8orbBZu.elV57IoxY70Cqa8TI2XW0z.U3dOc84bBFDE83277HsB4oykmNYgkYd-9NbV8enDst.RVEBu76
res = requests.get(
url="https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web&_signature=_02B4Z6wo009010IJgRwAAIDDtGCIOlEVa8tCLYWAALV5CV7lvAp2MWxOhC9EGgecK8orbBZu.elV57IoxY70Cqa8TI2XW0z.U3dOc84bBFDE83277HsB4oykmNYgkYd-9NbV8enDst.RVEBu76",
headers={
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
}
)
print(res.text)
3.2 _signature(寻找签名因为具有失效性,假如操作体育可能就获取不到了)
(null === (n = window.byted_acrawler) || void 0 === n || null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, o)) || ""
n=undefined;
o={url:"https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web"}
var o = window.byted_acrawler.sign.call(n,o);
再简化一下
o={url:"https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web"}
var o = window.byted_acrawler.sign(o);
- 找到sign算法,看看他是内部实现(走不通)。
- 应该有一个js,给全局变量中赋值,
- 整体调用试试看,把JS粘贴过来,找到了这个JS加载完之后赋的值