Skip to content
This repository was archived by the owner on Aug 10, 2024. It is now read-only.

Commit 46735ab

Browse files
committed
2 parents f9af9e3 + d5b50ff commit 46735ab

9 files changed

Lines changed: 272 additions & 43 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# RSParser
22

3-
This framework was developed for [NetNewsWire](https://github.com/brentsimmons/NetNewsWire) and is made available here for developers who just need the parsing code. It has no depencies that aren’t provided by the system.
3+
This framework was developed for [NetNewsWire](https://github.com/brentsimmons/NetNewsWire) and is made available here for developers who just need the parsing code. It has no dependencies that aren’t provided by the system.
44

55
_Update 6 Feb. 2018_: RSParser is now a CocoaPod, with the much-appreciated help of [Silver Fox](https://github.com/dcilia). (We _think_ it worked, anyway. Looked like it did.)
66

Sources/ObjC/NSData+RSParser.m

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,6 @@ - (BOOL)isProbablyRSSInJSON {
5858

5959
- (BOOL)isProbablyRSS {
6060

61-
if (bytesStartWithRSS(self.bytes, self.length) || bytesStartWithRDF(self.bytes, self.length)) { // Macworld’s RSS feed does not start with xml header.
62-
return YES;
63-
}
64-
if (![self isProbablyXML]) {
65-
return NO;
66-
}
67-
6861
if (didFindString("<rss", self.bytes, self.length) || didFindString("<rdf:RDF", self.bytes, self.length)) {
6962
return YES;
7063
}
@@ -75,13 +68,6 @@ - (BOOL)isProbablyRSS {
7568

7669
- (BOOL)isProbablyAtom {
7770

78-
if (bytesStartWithAtom(self.bytes, self.length)) { // https://research.swtch.com/feed.atom does not start with xml header.
79-
return YES;
80-
}
81-
if (![self isProbablyXML]) {
82-
return NO;
83-
}
84-
8571
return didFindString("<feed", self.bytes, self.length);
8672
}
8773

@@ -151,18 +137,3 @@ static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes) {
151137

152138
return bytesStartWithStringIgnoringWhitespace("<?xml", bytes, numberOfBytes);
153139
}
154-
155-
static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes) {
156-
157-
return bytesStartWithStringIgnoringWhitespace("<rss", bytes, numberOfBytes);
158-
}
159-
160-
static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes) {
161-
162-
return bytesStartWithStringIgnoringWhitespace("<rdf:RDF", bytes, numberOfBytes);
163-
}
164-
165-
static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes) {
166-
167-
return bytesStartWithStringIgnoringWhitespace("<feed", bytes, numberOfBytes);
168-
}

Sources/ObjC/RSHTMLMetadataParser.m

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ @interface RSHTMLMetadataParser () <RSSAXHTMLParserDelegate>
2424
@property (nonatomic, readwrite) RSHTMLMetadata *metadata;
2525
@property (nonatomic) NSMutableArray *tags;
2626
@property (nonatomic) BOOL didFinishParsing;
27+
@property (nonatomic) BOOL shouldScanPastHeadSection;
2728

2829
@end
2930

@@ -55,6 +56,11 @@ - (instancetype)initWithParserData:(ParserData *)parserData {
5556
_parserData = parserData;
5657
_tags = [NSMutableArray new];
5758

59+
// YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section.
60+
// This allows for a special case where we continue to scan after the head section.
61+
// (Yes, this match could yield false positives, but it’s harmless.)
62+
_shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound;
63+
5864
[self parse];
5965

6066
return self;
@@ -121,7 +127,7 @@ - (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)
121127
return;
122128
}
123129

124-
if (RSSAXEqualTags(localName, kBody, kBodyLength)) {
130+
if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) {
125131
self.didFinishParsing = YES;
126132
return;
127133
}

Tests/RSParserTests/AtomParserTests.swift

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,7 @@ class AtomParserTests: XCTestCase {
4444

4545
for article in parsedFeed.items {
4646

47-
XCTAssertNotNil(article.externalURL)
48-
49-
if !article.title!.hasPrefix("") {
50-
XCTAssertNotNil(article.url)
51-
XCTAssert(article.url!.hasPrefix("https://daringfireball.net/"))
52-
}
47+
XCTAssertNotNil(article.url)
5348

5449
XCTAssertTrue(article.uniqueID.hasPrefix("tag:daringfireball.net,2017:/"))
5550

Tests/RSParserTests/FeedParserTypeTests.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,12 @@ class FeedParserTypeTests: XCTestCase {
106106
XCTAssertTrue(type == .rss)
107107
}
108108

109+
func testPHPXML() {
110+
let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/")
111+
let type = feedType(d)
112+
XCTAssertTrue(type == .rss)
113+
}
114+
109115
// MARK: Atom
110116

111117
func testDaringFireballAtomType() {

Tests/RSParserTests/HTMLMetadataTests.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,16 @@ class HTMLMetadataTests: XCTestCase {
139139
let imageURL = twitterData.imageURL!
140140
XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177")
141141
}
142+
143+
func testYouTube() {
144+
// YouTube is a special case — the feed links appear after the head section, in the body section.
145+
let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks")
146+
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
147+
148+
XCTAssertEqual(metadata.feedLinks.count, 1);
149+
let feedLink = metadata.feedLinks.first!
150+
XCTAssertEqual(feedLink.title, "RSS");
151+
XCTAssertEqual(feedLink.type, "application/rss+xml");
152+
XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ");
153+
}
142154
}

Tests/RSParserTests/RSDateParserTests.swift

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,9 @@ class RSDateParserTests: XCTestCase {
100100
XCTAssertEqual(d, expectedDateResult)
101101
}
102102

103-
func testHighMillisecondDate() {
104-
let expectedDateResult = Self.dateWithValues(2021, 03, 29, 10, 46, 56)
105-
let d = RSDateWithString("2021-03-29T10:46:56.516941+00:00")
106-
XCTAssertEqual(d, expectedDateResult)
107-
}
108-
103+
// func testHighMillisecondDate() {
104+
// let expectedDateResult = Self.dateWithValues(2021, 03, 29, 10, 46, 56)
105+
// let d = RSDateWithString("2021-03-29T10:46:56.516941+00:00")
106+
// XCTAssertEqual(d, expectedDateResult)
107+
// }
109108
}

Tests/RSParserTests/Resources/YouTubeTheVolvoRocks.html

Lines changed: 24 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)