Stahovač archivu článků časopisu Vesmír (vesmir.cz). Vyžaduje aktivní předplatné a jméno/heslo, jinak budou některé články neúplné.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
vesmir-scraper/session.inc

87 lines
2.2 KiB

<?php
function solveChallenge(Html $doc) : int {
$challenge = $doc->find('[for=spamProtectionDisableResult]')->text();
echo "Challenge is: $challenge\n";
if (preg_match('/(\d+) (plus|mínus) (\d+)/', $challenge, $m)) {
$a = +$m[1];
$op = $m[2] == 'plus' ? 1 : -1;
$b = +$m[3];
$r = $a + $op * $b;
echo "Result: $r\n";
return $r;
} else {
throw new Exception("Unexpected challenge: $challenge");
}
}
function login()
{
echo "----- attempting to login -----\n";
return post( "https://vesmir.cz/usrlogon.do", [
"username" => VESMIR_LOGIN,
"password" => VESMIR_PASSWORD,
"docId" => 9573,
"doShowdocAction" => "/usrlogon.do",
"emailLogon" => false,
"origDocId" => 9573,
]);
}
function disableAntispam()
{
echo "----- disabling antispam -----\n";
$r = get("https://vesmir.cz/components/form/spamprotectiondisable.jsp?backurl=%2Fcz%2Fuzivatel.html");
$doc = new Html($r->content);
$solved = solveChallenge($doc);
$result = post( "https://vesmir.cz/components/form/spamprotectiondisable.jsp", [
"result" => $solved,
"__token" => $doc->find('[name=__token]')->value,
"backurl" => "/cz/uzivatel.html",
"hash" => $doc->find('[name=hash]')->value,
]);
if (preg_match("/Zadaný výsledek je správný/", $result->content)) {
echo "Anti-spam succeeded.\n";
return $result;
} else {
print_r($result);
throw new Exception("Failed to disable antispam.");
}
}
function dump_cookie_file_for_wget() {
// echo "Exporting cookie for WGET\n";
// $c = file_get_contents("cookie.txt");
// $c = str_replace('#HttpOnly_', '', $c);
// file_put_contents('cookie-wget.txt', $c);
}
function ensure_logged_in() {
// get a session cookie
$r = get("https://vesmir.cz/cz/uzivatel.html");
if (strpos($r->content, '/logoff.do?forward=/cz/') !== false) {
echo "Already logged in!\n";
dump_cookie_file_for_wget();
return true;
} else {
echo "Need login!\n";
disableAntispam();
get("https://vesmir.cz/cz/uzivatel.html");
$result = login();
if (strpos($r->content, '/logoff.do?forward=/cz/') !== false) {
echo "Logged in!\n";
dump_cookie_file_for_wget();
return true;
} else {
print_r($result);
throw new Exception("--- LOGIN FAILED! ---");
}
}
}