commit
4760eaf987
@ -0,0 +1,5 @@ |
||||
.idea/ |
||||
out/ |
||||
cookie.txt |
||||
cookie-wget.txt |
||||
vesmir.cz |
@ -0,0 +1,97 @@ |
||||
<?php |
||||
|
||||
const UA = 'Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0'; |
||||
|
||||
function get_doc($url) { |
||||
return new Html(get($url)->content); |
||||
} |
||||
|
||||
function get_file($url) { |
||||
return get($url)->content; |
||||
} |
||||
|
||||
function get_or_post($url, $mergeoptions) { |
||||
$options = array( |
||||
CURLOPT_USERAGENT => UA, //set user agent |
||||
CURLOPT_COOKIEFILE => "cookie.txt", //set cookie file |
||||
CURLOPT_COOKIEJAR => "cookie.txt", //set cookie jar |
||||
CURLOPT_COOKIESESSION => false, |
||||
CURLOPT_RETURNTRANSFER => true, // return web page |
||||
CURLOPT_HEADER => false, // don't return headers |
||||
CURLOPT_FOLLOWLOCATION => true, // follow redirects |
||||
CURLOPT_ENCODING => "", // handle all encodings |
||||
CURLOPT_AUTOREFERER => true, // set referer on redirect |
||||
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect |
||||
CURLOPT_TIMEOUT => 120, // timeout on response |
||||
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects |
||||
); |
||||
|
||||
foreach ($mergeoptions as $k => $v) { |
||||
$options[$k] = $v; |
||||
} |
||||
|
||||
$ch = curl_init($url); |
||||
curl_setopt_array($ch, $options); |
||||
|
||||
// this function is called by curl for each header received |
||||
$response_headers = []; |
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, |
||||
function($curl, $header) use (&$response_headers) |
||||
{ |
||||
$len = strlen($header); |
||||
$header = explode(':', $header, 2); |
||||
if (count($header) < 2) // ignore invalid headers |
||||
return $len; |
||||
|
||||
$name = strtolower(trim($header[0])); |
||||
if (!array_key_exists($name, $response_headers)) |
||||
$response_headers[$name] = [trim($header[1])]; |
||||
else |
||||
$response_headers[$name][] = trim($header[1]); |
||||
|
||||
return $len; |
||||
} |
||||
); |
||||
|
||||
$content = curl_exec($ch); |
||||
$err = curl_errno($ch); |
||||
$errmsg = curl_error($ch); |
||||
$header = curl_getinfo($ch); |
||||
curl_close($ch); |
||||
|
||||
$header['errno'] = $err; |
||||
$header['errmsg'] = $errmsg; |
||||
$header['headers'] = $response_headers; |
||||
|
||||
// echo "Result:\n"; |
||||
// print_r($header); |
||||
|
||||
$header['content'] = $content; |
||||
|
||||
if ($header['http_code'] != 200) { |
||||
print_r($header); |
||||
throw new \Exception("Error status: $header[http_code]"); |
||||
} |
||||
|
||||
return (object) $header; |
||||
} |
||||
|
||||
function get($url) |
||||
{ |
||||
echo "Sending GET to: $url\n"; |
||||
return get_or_post($url, [ |
||||
CURLOPT_CUSTOMREQUEST => "GET", //set request type post or get |
||||
CURLOPT_POST => false, //set to GET |
||||
]); |
||||
} |
||||
|
||||
function post($url, $fields) |
||||
{ |
||||
echo "Sending POST to: $url\n"; |
||||
print_r($fields); |
||||
return get_or_post($url, [ |
||||
CURLOPT_CUSTOMREQUEST => "POST", //set request type post or get |
||||
CURLOPT_POST => true, //set to GET |
||||
CURLOPT_POSTFIELDS => $fields, |
||||
]); |
||||
} |
@ -0,0 +1,186 @@ |
||||
<?php |
||||
|
||||
const PARSER_DEBUG = 0; |
||||
const XPATH_DEBUG = 0; |
||||
|
||||
/** |
||||
* Trait DomQuery |
||||
* |
||||
* requires: |
||||
* |
||||
$this->dom = $dom; |
||||
$this->contextnode = null; |
||||
*/ |
||||
trait DomQuery { |
||||
public function find(string $pat) : Node |
||||
{ |
||||
$els = $this->findAll($pat); |
||||
if (!count($els)) { |
||||
if (PARSER_DEBUG) { |
||||
echo "---- match failed; context: ---\n"; |
||||
echo $this->toXml() ."\n"; |
||||
} |
||||
|
||||
throw new \Exception("No match: $pat"); |
||||
} |
||||
if (count($els) > 1) { |
||||
if (PARSER_DEBUG) { |
||||
echo "Query results:\n"; |
||||
foreach ($els as $el) { |
||||
echo $el->toXml()."\n"; |
||||
} |
||||
} |
||||
throw new \Exception("Multiple match (".count($els)."x): $pat"); |
||||
} |
||||
return $els[0]; |
||||
} |
||||
|
||||
/** |
||||
* @param $pat |
||||
* @return array|Node[] |
||||
* @throws Exception |
||||
*/ |
||||
public function findAll(string $pat) : array |
||||
{ |
||||
# node |
||||
if (preg_match('/^([a-z0-9_-]+)$/i', $pat, $matches)) { |
||||
return $this->x("//$matches[1]"); |
||||
} |
||||
|
||||
# .class, node.class |
||||
if (preg_match('/^(?P<elem>[a-z0-9_-]*)\.(?P<cls>[a-z0-9_-]+)$/i', $pat, $matches)) { |
||||
$elem = $matches['elem'] ?: '*'; |
||||
return $this->x("//{$elem}[contains(concat(' ',normalize-space(@class),' '),' $matches[cls] ')]"); |
||||
} |
||||
|
||||
// #id |
||||
if (preg_match('/^#(\w+)$/', $pat, $matches)) { |
||||
return $this->x("//*[id='$matches[1]']"); |
||||
} |
||||
|
||||
# [attr=value], node[attr=value] (allows quotes) |
||||
if (preg_match('/^(?P<elem>[a-z0-9_-]*)\[(?P<attr>[a-z0-9_-]+)(?P<op>[$*~^]|)=[\'"]?(?P<val>[^\'"\]]+)[\'"]?\]$/', $pat, $matches)) { |
||||
$elem = $matches['elem'] ?: '*'; |
||||
$op = $matches['op']; |
||||
|
||||
switch ($op) { |
||||
case '': |
||||
return $this->x("//{$elem}[@$matches[attr]='$matches[val]']"); |
||||
case '^': |
||||
return $this->x("//{$elem}[starts-with(@$matches[attr], '$matches[val]')]"); |
||||
// this doesnt work.. |
||||
// case '$': |
||||
// $vlen = strlen($matches['val']); |
||||
// return $this->x("//{$elem}['$matches[val]' = substring(@$matches[attr], string-length(@$matches[attr]) - $vlen)]"); |
||||
case '*': |
||||
case '~': |
||||
return $this->x("//{$elem}[contains(@$matches[attr], '$matches[val]')]"); |
||||
} |
||||
} |
||||
|
||||
# [attr^=value], node[attr^=value] (allows quotes) |
||||
if (preg_match('/^(?P<elem>[a-z0-9_-]*)\[(?P<attr>[a-z0-9_-]+)^=[\'"]?(?P<val>[^\'"\]]+)[\'"]?\]$/', $pat, $matches)) { |
||||
$elem = $matches['elem'] ?: '*'; |
||||
return $this->x("//{$elem}[@$matches[attr]='$matches[val]']"); |
||||
} |
||||
|
||||
# [attr], node[attr] |
||||
if (preg_match('/^(?P<elem>[a-z0-9_-]*)\[(?P<attr>[a-z0-9_-]+)\]$/', $pat, $matches)) { |
||||
$elem = $matches['elem'] ?: '*'; |
||||
return $this->x("//{$elem}[@$matches[attr]]"); |
||||
} |
||||
|
||||
throw new \Exception("Unknown pattern: $pat"); |
||||
} |
||||
|
||||
public function x(string $x) : array |
||||
{ |
||||
$xpath = new DOMXpath($this->dom); |
||||
if (strpos($x, '//') === 0 && $this->contextnode) { |
||||
$x = '.' . $x; |
||||
} |
||||
|
||||
if (XPATH_DEBUG) echo "\nxpath is: $x\n"; |
||||
|
||||
$elements = $xpath->query($x, $this->contextnode) ?? []; |
||||
$elems = []; |
||||
foreach($elements as $e) { |
||||
$elems[] = new Node($this->dom, $e); |
||||
} |
||||
return $elems; |
||||
} |
||||
} |
||||
|
||||
|
||||
class Html |
||||
{ |
||||
use DomQuery; |
||||
|
||||
public function __construct(string $html) |
||||
{ |
||||
$dom = new DomDocument(); |
||||
|
||||
if (PARSER_DEBUG) echo "Creating HTML parser from:\n" . $html . "\n\n"; |
||||
|
||||
@$dom->loadHTML($html); // suppress spammy warnings |
||||
$this->dom = $dom; |
||||
$this->contextnode = null; |
||||
} |
||||
|
||||
public function toXml() : string |
||||
{ |
||||
return $this->dom->saveXml(); |
||||
} |
||||
} |
||||
|
||||
|
||||
class Node |
||||
{ |
||||
use DomQuery; |
||||
|
||||
public function __construct(DOMDocument $dom, DOMNode $element) |
||||
{ |
||||
$this->dom = $dom; |
||||
$this->element = $element; |
||||
$this->contextnode = $element; |
||||
} |
||||
|
||||
public function __get($name) |
||||
{ |
||||
return $this->element->getAttribute($name); |
||||
} |
||||
|
||||
public function text() : string |
||||
{ |
||||
return $this->element->nodeValue ?? ''; |
||||
} |
||||
|
||||
/** |
||||
* @return array|Node[] |
||||
*/ |
||||
public function childNodes() : array |
||||
{ |
||||
$elems = []; |
||||
foreach($this->element->childNodes as $e) { |
||||
$elems[] = new Node($this->dom, $e); |
||||
} |
||||
return $elems; |
||||
} |
||||
|
||||
/** |
||||
* @return Node |
||||
*/ |
||||
public function childNode() : Node |
||||
{ |
||||
$cn = $this->childNodes(); |
||||
if (count($cn) > 1) { |
||||
throw new \Exception("More than one childnode."); |
||||
} |
||||
return $cn[0]; |
||||
} |
||||
|
||||
public function toXml() : string |
||||
{ |
||||
return $this->element->ownerDocument->saveXml($this->element); |
||||
} |
||||
} |
@ -0,0 +1,279 @@ |
||||
<?php |
||||
|
||||
const MAX_DIR_NAME_LEN = 40; |
||||
const SKIP_EXISTING = true; |
||||
const VESMIR_CZ = 'https://vesmir.cz'; |
||||
const VESMIR_LOGIN = ""; |
||||
const VESMIR_PASSWORD = ""; |
||||
|
||||
require_once "http.inc"; |
||||
require_once "parse.inc"; |
||||
require_once "session.inc"; |
||||
|
||||
function scrape_issue($rocnik_dir, $rocnik, $cislo, Html $doc) { |
||||
$cislo_dir = $rocnik_dir . '/' . $cislo; |
||||
if (!file_exists($cislo_dir)) { |
||||
mkdir($cislo_dir); |
||||
} |
||||
|
||||
echo "\nStahuji cislo $rocnik/$cislo\n\n"; |
||||
|
||||
$n_clanky = $doc->find('.clanky'); |
||||
$clankyItems = $n_clanky->findAll('.row'); |
||||
|
||||
$cl_num = 0; |
||||
$aktualni_h4 = null; |
||||
|
||||
foreach ($clankyItems as $row) { |
||||
try { |
||||
$hh = $row->find('h4'); |
||||
$aktualni_h4 = $hh->text(); |
||||
echo "\n~ Skupina clanku: $aktualni_h4 ~\n"; |
||||
continue; |
||||
} catch(Exception $e) { |
||||
/* ok.. */ |
||||
} |
||||
|
||||
if ($row->class != 'clankyItem row') { |
||||
echo "Skip non-article\n"; |
||||
continue; |
||||
} |
||||
|
||||
try { |
||||
//echo $row->toXml(); |
||||
$num = ++$cl_num; // zvysit pocitadlo... |
||||
|
||||
$h3 = $row->find('h3'); |
||||
$a = $h3->find('a'); |
||||
$clanek_url = VESMIR_CZ . $a->href; |
||||
$clanek_nazev = $a->text(); |
||||
|
||||
// Get slug |
||||
preg_match('|/([^./]+)\.html$|', $clanek_url, $m); |
||||
$slug = $m[1]; |
||||
|
||||
// Get dirname |
||||
$fname = $num . ' - ' . $clanek_nazev; |
||||
$fname = mb_ereg_replace("([^\w\s\d\-_~,;\[\]\(\). ])", '', $fname); |
||||
$fname = mb_ereg_replace("([\.]{2,})", '', $fname); |
||||
|
||||
if (strlen($fname) > MAX_DIR_NAME_LEN) { |
||||
$fname = substr($fname, 0, strrpos($fname, ' ', -(strlen($fname) - MAX_DIR_NAME_LEN))); |
||||
} |
||||
|
||||
// Ensure dir exists |
||||
$clanek_dir = $cislo_dir . '/' . $fname; |
||||
if (!file_exists($clanek_dir)) { |
||||
mkdir($clanek_dir); |
||||
} |
||||
|
||||
echo "\n- $rocnik/$cislo -> Clanek #$num: $clanek_nazev -\nUrl: $clanek_url\n"; |
||||
|
||||
$perex = null; |
||||
try { |
||||
$perex = $row->find('.perex')->text(); |
||||
} catch (Exception $e) { |
||||
echo "No perex. ".$e->getMessage()."\n"; |
||||
} |
||||
|
||||
$thumbfile = null; |
||||
try { |
||||
if (file_exists($clanek_dir . '/thumb.jpg')) { |
||||
$thumbfile = 'thumb.jpg'; |
||||
} else { |
||||
$thumb = $row->find('img.img-responsive'); |
||||
|
||||
$f = get_file(VESMIR_CZ . $thumb->src); |
||||
file_put_contents($clanek_dir . '/thumb.jpg', $f); |
||||
$thumbfile = 'thumb.jpg'; |
||||
} |
||||
} catch (Exception $e) { |
||||
echo "No thumb. ".$e->getMessage()."\n"; |
||||
} |
||||
|
||||
$author_names = []; |
||||
try { |
||||
$authors = $row->find('.authors'); |
||||
$author_links = $authors->findAll('a'); |
||||
|
||||
foreach ($author_links as $al) { |
||||
$author_names[] = $al->text(); |
||||
} |
||||
} catch (Exception $e) { |
||||
echo "!! No .authors div\n"; |
||||
} |
||||
|
||||
$merged_authors = implode(', ', $author_names); |
||||
|
||||
if(SKIP_EXISTING && file_exists($clanek_dir . '/clanek.json')) { |
||||
echo "ARTICLE ALREADY DL'D, SKIP\n"; |
||||
continue; |
||||
} |
||||
|
||||
$resp = get_file($clanek_url); |
||||
file_put_contents($clanek_dir . '/orig.html', $resp); |
||||
|
||||
$article_doc = new Html($resp); |
||||
|
||||
$attachments = []; |
||||
|
||||
// Try to download attachments (pdf version...) |
||||
try { |
||||
$dmedia = $article_doc->find('.media'); |
||||
foreach ($dmedia->findAll('a[href]') as $item) { |
||||
$href = VESMIR_CZ . $item->href; |
||||
echo "> Downloading: " . $item->text() . "\n" . $href; |
||||
|
||||
$fname = uniqid() . '.pdf'; // it's probably a pdf |
||||
if ($item->text() == 'článek ve formátu pdf') { |
||||
$isarticlepdf = true; |
||||
$fname = $slug . '.pdf'; |
||||
} |
||||
|
||||
$resp = get($href); |
||||
|
||||
if (isset($resp->headers['content-disposition'])) { |
||||
$first = $resp->headers['content-disposition'][0]; |
||||
list(, $orig_fname) = explode('filename=', $first); |
||||
} |
||||
if (!$isarticlepdf) { |
||||
$fname = $orig_fname; |
||||
} |
||||
|
||||
file_put_contents($clanek_dir . '/' . $fname, $resp->content); |
||||
unset($resp->content); |
||||
|
||||
$attachments[] = [ |
||||
'url' => $href, |
||||
'popis' => $item->text(), |
||||
'nazev' => $orig_fname, |
||||
'soubor' => $fname, |
||||
]; |
||||
} |
||||
|
||||
} catch(Exception $e) { |
||||
echo "Error finding media links: ".$e->getMessage()."\n"; |
||||
} |
||||
|
||||
$adiv = $article_doc->find('div.article'); |
||||
$body = $adiv->toXml(); // serialize the body div |
||||
$body = str_replace(' ', '', $body); |
||||
|
||||
$picnum = 0; |
||||
$body = preg_replace_callback('|src="(/images/[^"]+)"|', function($m) use ($clanek_dir, &$picnum) { |
||||
$uri = $m[1]; |
||||
$url = VESMIR_CZ . $uri; |
||||
|
||||
preg_match('|/([^/]+)$|', $uri, $m); |
||||
$img_slug = $m[1]; |
||||
$img_fname = 'img_' . ($picnum++) . '_' . $img_slug; |
||||
|
||||
try { |
||||
$f = get_file($url); |
||||
file_put_contents($clanek_dir . '/' . $img_fname, $f); |
||||
return "src=\"".htmlspecialchars($img_fname)."\""; |
||||
} catch(\Exception $e) { |
||||
echo "Error getting img $uri\n"; |
||||
echo $e->getMessage(); |
||||
echo $e->getTraceAsString(); |
||||
return $m[0]; // no subst. |
||||
} |
||||
}, $body); |
||||
|
||||
$nazev_e = htmlspecialchars($clanek_nazev); |
||||
$merged_authors_e = htmlspecialchars($merged_authors); |
||||
|
||||
$cleaned = <<<DOC |
||||
<!DOCTYPE html> |
||||
<html lang="cs"> |
||||
<head> |
||||
<meta charset="utf-8"> |
||||
<title>$nazev_e</title> |
||||
<link href="../../../style.css" rel="stylesheet" type="text/css" /> |
||||
</head> |
||||
<body> |
||||
<h1 class="article-name">$nazev_e</h1> |
||||
<p class="authors">$merged_authors_e</p> |
||||
<!-- article begin --> |
||||
|
||||
$body |
||||
|
||||
<!-- article end --> |
||||
</body> |
||||
</html> |
||||
|
||||
DOC; |
||||
|
||||
file_put_contents($clanek_dir . '/clanek.html', $cleaned); |
||||
|
||||
$metadata = [ |
||||
'nazev' => $clanek_nazev, |
||||
'slug' => $slug, |
||||
'url' => $clanek_url, |
||||
'autori' => $author_names, |
||||
'rocnik' => $rocnik, |
||||
'cislo' => $cislo, |
||||
'poradi' => $cl_num, |
||||
'prilohy' => $attachments, |
||||
'thumb' => $thumbfile, |
||||
'perex' => $perex, |
||||
]; |
||||
file_put_contents($clanek_dir . '/clanek.json', json_encode($metadata, 128|JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES)); |
||||
|
||||
} catch (Exception $e) { |
||||
echo $e->getMessage() . "\n" . $e->getTraceAsString() . "\n"; |
||||
} |
||||
} |
||||
} |
||||
|
||||
function scrape_year($year) { |
||||
$doc = get_doc(VESMIR_CZ . "/cz/casopis/archiv-casopisu/$year/"); |
||||
$obalky = $doc->findAll('.vesmirObalka'); |
||||
|
||||
$rocnik_dir = __DIR__ . '/out/' . $year; |
||||
if (!file_exists($rocnik_dir)) { |
||||
mkdir($rocnik_dir); |
||||
} |
||||
|
||||
foreach ($obalky as $obalka) { |
||||
$a = $obalka->childNode(); |
||||
$url_cislo = $a->href; |
||||
|
||||
echo $url_cislo.PHP_EOL; |
||||
|
||||
if (!preg_match('|/(\d+)/cislo-(\d+)/$|', $url_cislo, $m)) { |
||||
die("weird format $url_cislo"); |
||||
} |
||||
echo "== Rocnik $m[1], cislo $m[2] ==\n"; |
||||
$rocnik = $m[1]; |
||||
$cislo = $m[2]; |
||||
$ident = "$rocnik-$cislo"; |
||||
|
||||
$i = $a->childNode(); |
||||
$url_thumb = $i->src; |
||||
|
||||
$url_thumb = str_replace("?h=180", "?h=1800", $url_thumb); |
||||
|
||||
echo "Casopis URL: $url_cislo\nObalka URL: $url_thumb\n\n"; |
||||
$obalka_file = $rocnik_dir . "/$ident.jpg"; |
||||
|
||||
if (!file_exists($obalka_file)) { |
||||
echo "Stahuji obalku...\n"; |
||||
$c = get_file(VESMIR_CZ . $url_thumb); |
||||
file_put_contents($obalka_file, $c); |
||||
} |
||||
|
||||
$c = get_doc(VESMIR_CZ . $url_cislo); |
||||
|
||||
scrape_issue($rocnik_dir, $rocnik, $cislo, $c); |
||||
} |
||||
} |
||||
|
||||
|
||||
ensure_logged_in(); |
||||
//scrape_year(2019); |
||||
|
||||
for ($i = 2019; $i >= 1994; $i--) { |
||||
ensure_logged_in(); |
||||
scrape_year($i); |
||||
} |
@ -0,0 +1,87 @@ |
||||
<?php |
||||
|
||||
function solveChallenge(Html $doc) : int { |
||||
$challenge = $doc->find('[for=spamProtectionDisableResult]')->text(); |
||||
echo "Challenge is: $challenge\n"; |
||||
if (preg_match('/(\d+) (plus|mínus) (\d+)/', $challenge, $m)) { |
||||
$a = +$m[1]; |
||||
$op = $m[2] == 'plus' ? 1 : -1; |
||||
$b = +$m[3]; |
||||
$r = $a + $op * $b; |
||||
echo "Result: $r\n"; |
||||
return $r; |
||||
} else { |
||||
throw new Exception("Unexpected challenge: $challenge"); |
||||
} |
||||
} |
||||
|
||||
function login() |
||||
{ |
||||
echo "----- attempting to login -----\n"; |
||||
|
||||
return post( "https://vesmir.cz/usrlogon.do", [ |
||||
"username" => VESMIR_LOGIN, |
||||
"password" => VESMIR_PASSWORD, |
||||
"docId" => 9573, |
||||
"doShowdocAction" => "/usrlogon.do", |
||||
"emailLogon" => false, |
||||
"origDocId" => 9573, |
||||
]); |
||||
} |
||||
|
||||
function disableAntispam() |
||||
{ |
||||
echo "----- disabling antispam -----\n"; |
||||
|
||||
$r = get("https://vesmir.cz/components/form/spamprotectiondisable.jsp?backurl=%2Fcz%2Fuzivatel.html"); |
||||
|
||||
$doc = new Html($r->content); |
||||
$solved = solveChallenge($doc); |
||||
|
||||
$result = post( "https://vesmir.cz/components/form/spamprotectiondisable.jsp", [ |
||||
"result" => $solved, |
||||
"__token" => $doc->find('[name=__token]')->value, |
||||
"backurl" => "/cz/uzivatel.html", |
||||
"hash" => $doc->find('[name=hash]')->value, |
||||
]); |
||||
|
||||
if (preg_match("/Zadaný výsledek je správný/", $result->content)) { |
||||
echo "Anti-spam succeeded.\n"; |
||||
return $result; |
||||
} else { |
||||
print_r($result); |
||||
|
||||
throw new Exception("Failed to disable antispam."); |
||||
} |
||||
} |
||||
|
||||
function dump_cookie_file_for_wget() { |
||||
// echo "Exporting cookie for WGET\n"; |
||||
// $c = file_get_contents("cookie.txt"); |
||||
// $c = str_replace('#HttpOnly_', '', $c); |
||||
// file_put_contents('cookie-wget.txt', $c); |
||||
} |
||||
|
||||
function ensure_logged_in() { |
||||
// get a session cookie |
||||
$r = get("https://vesmir.cz/cz/uzivatel.html"); |
||||
if (strpos($r->content, '/logoff.do?forward=/cz/') !== false) { |
||||
echo "Already logged in!\n"; |
||||
dump_cookie_file_for_wget(); |
||||
return true; |
||||
} else { |
||||
echo "Need login!\n"; |
||||
|
||||
disableAntispam(); |
||||
get("https://vesmir.cz/cz/uzivatel.html"); |
||||
$result = login(); |
||||
if (strpos($r->content, '/logoff.do?forward=/cz/') !== false) { |
||||
echo "Logged in!\n"; |
||||
dump_cookie_file_for_wget(); |
||||
return true; |
||||
} else { |
||||
print_r($result); |
||||
throw new Exception("--- LOGIN FAILED! ---"); |
||||
} |
||||
} |
||||
} |
Loading…
Reference in new issue