Skip to content

Commit

Permalink
AO3Bridge: move tags to categories and remove duplicate fic summary (#…
Browse files Browse the repository at this point in the history
…4031)

* AO3Bridge: move tags to categories and remove duplicate fic summary

* [AO3Bridge] Fix tag html entity encoding
  • Loading branch information
Phantop committed Mar 28, 2024
1 parent e251e35 commit db984d8
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion bridges/AO3Bridge.php
Expand Up @@ -91,12 +91,26 @@ private function collectList($url)
continue; // discard deleted works
}
$item['title'] = $title->plaintext;
$item['content'] = $element;
$item['uri'] = $title->href;

$strdate = $element->find('div p.datetime', 0)->plaintext;
$item['timestamp'] = strtotime($strdate);

// detach from rest of page because remove() is buggy
$element = str_get_html($element->outertext());
$tags = $element->find('ul.required-tags', 0);
foreach ($tags->childNodes() as $tag) {
$item['categories'][] = html_entity_decode($tag->plaintext);
}
$tags->remove();
$tags = $element->find('ul.tags', 0);
foreach ($tags->childNodes() as $tag) {
$item['categories'][] = html_entity_decode($tag->plaintext);
}
$tags->remove();

$item['content'] = implode('', $element->childNodes());

$chapters = $element->find('dl dd.chapters', 0);
// bookmarked series and external works do not have a chapters count
$chapters = (isset($chapters) ? $chapters->plaintext : 0);
Expand All @@ -123,6 +137,10 @@ private function collectList($url)
$response = $httpClient->request($url, $agent);
$html = \str_get_html($response->getBody());
$html = defaultLinkTo($html, self::URI);
// remove duplicate fic summary
if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) {
$ficsum->remove();
}
$item['content'] .= $html->find('#workskin', 0);
}

Expand Down

0 comments on commit db984d8

Please sign in to comment.