diff --git a/changedetectionio/blueprint/settings/templates/settings.html b/changedetectionio/blueprint/settings/templates/settings.html index 60d5e138fb4..cba33e4f193 100644 --- a/changedetectionio/blueprint/settings/templates/settings.html +++ b/changedetectionio/blueprint/settings/templates/settings.html @@ -199,6 +199,14 @@ +
+ {{ render_field(form.application.form.custom_outofstock_strings) }} + Additional custom out-of-stock detection strings (one per line). +
+
+ {{ render_field(form.application.form.custom_instock_strings) }} + Additional custom in-stock detection strings (one per line). +
diff --git a/changedetectionio/content_fetchers/res/stock-not-in-stock.js b/changedetectionio/content_fetchers/res/stock-not-in-stock.js index 95c6df883c6..fa9f230752a 100644 --- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js +++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js @@ -1,8 +1,8 @@ -async () => { +async (customOutOfStockStrings = []) => { function isItemInStock() { // @todo Pass these in so the same list can be used in non-JS fetchers - const outOfStockTexts = [ + const builtInOutOfStockTexts = [ ' أخبرني عندما يتوفر', '0 in stock', 'actuellement indisponible', @@ -110,6 +110,9 @@ async () => { '품절' ]; + // Combine built-in strings with custom strings provided by user + const outOfStockTexts = [...builtInOutOfStockTexts, ...customOutOfStockStrings]; + const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 403e03f9676..8088aacffac 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -774,6 +774,20 @@ class globalSettingsApplicationForm(commonSettingsForm): message="Should contain zero or more attempts")]) ui = FormField(globalSettingsApplicationUIForm) + #@todo better validations? + + custom_outofstock_strings = StringListField('Custom out-of-stock detection strings', + [validators.Optional()], + render_kw={ + "placeholder": "Enter custom out-of-stock strings, one per line\nExample:\nPronto estarán en stock!\nTemporarily out of stock", + "rows": "3"}) + + custom_instock_strings = StringListField('Custom in-stock detection strings', + [validators.Optional()], + render_kw={ + "placeholder": "Enter custom in-stock strings, one per line\nExample:\nDisponible ahora\nIn voorraad", + "rows": "3"}) + class globalSettingsForm(Form): # Define these as FormFields/"sub forms", this way it matches the JSON storage diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index f7478ee921e..8139e32e351 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -38,6 +38,8 @@ class model(dict): # Custom notification content 'api_access_token_enabled': True, 'base_url' : None, + 'custom_instock_strings': [], + 'custom_outofstock_strings' : [], 'empty_pages_are_a_change': False, 'extract_title_as_title': False, 'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), diff --git a/changedetectionio/processors/restock_diff/forms.py b/changedetectionio/processors/restock_diff/forms.py index 39334aa3c71..6978849a6bd 100644 --- a/changedetectionio/processors/restock_diff/forms.py +++ b/changedetectionio/processors/restock_diff/forms.py @@ -1,7 +1,8 @@ from wtforms import ( BooleanField, validators, - FloatField + FloatField, + TextAreaField ) from wtforms.fields.choices import RadioField from wtforms.fields.form import FormField @@ -29,6 +30,7 @@ class RestockSettingsForm(Form): follow_price_changes = BooleanField('Follow price changes', default=True) + class processor_settings_form(processor_text_json_diff_form): restock_settings = FormField(RestockSettingsForm) @@ -74,7 +76,7 @@ def extra_form_content(self): {{ render_field(form.restock_settings.price_change_threshold_percent) }} Price must change more than this % to trigger a change since the first check.
For example, If the product is $1,000 USD originally, 2% would mean it has to change more than $20 since the first check.
- +
""" diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 1fa81058caa..19a14f0c80e 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -143,6 +143,89 @@ def is_between(number, lower=None, upper=None): class perform_site_check(difference_detection_processor): screenshot = None xpath_data = None + + def _normalize_text_for_matching(self, text): + """ + Normalize text for more robust matching: + - Convert to lowercase + - Remove accents/diacritics + - Normalize whitespace + """ + import unicodedata + import re + + if not text: + return "" + + # Convert to lowercase + text = text.lower() + + # Remove accents/diacritics (NFD normalization + filter) + # This converts "é" to "e", "ñ" to "n", etc. + text = unicodedata.normalize('NFD', text) + text = ''.join(char for char in text if unicodedata.category(char) != 'Mn') + + # Normalize whitespace (replace multiple spaces/tabs/newlines with single space) + text = re.sub(r'\s+', ' ', text).strip() + + return text + + def _check_custom_strings(self, text_to_check, custom_strings, string_type="out-of-stock"): + """ + Check text against custom strings (either in-stock or out-of-stock). + Uses normalized matching for better international support. + Returns the matched string if found, None otherwise. + """ + if not custom_strings: + return None + + # Split custom strings by newlines and clean them up + raw_custom_list = [s.strip() for s in custom_strings.split('\n') if s.strip()] + + if not raw_custom_list: + return None + + # Normalize both the page text and custom strings for matching + normalized_text = self._normalize_text_for_matching(text_to_check) + + # Check each custom string against the text + for original_custom_text in raw_custom_list: + normalized_custom_text = self._normalize_text_for_matching(original_custom_text) + + if normalized_custom_text and normalized_custom_text in normalized_text: + logger.debug(f"Custom {string_type} string found: '{original_custom_text}' (normalized: '{normalized_custom_text}')") + return original_custom_text # Return the original user-provided string + + return None + + def _get_combined_instock_strings(self, restock_settings): + """ + Get combined list of built-in and custom in-stock strings. + Custom strings are normalized for better matching. + """ + # Built-in in-stock strings (from the TODO line) + builtin_instock_strings = [ + 'instock', + 'instoreonly', + 'limitedavailability', + 'onlineonly', + 'presale' + ] + + # Add custom in-stock strings if provided + custom_strings = restock_settings.get('custom_instock_strings', '').strip() + if custom_strings: + # Normalize custom strings for better matching + custom_list = [] + for s in custom_strings.split('\n'): + s = s.strip() + if s: + normalized = self._normalize_text_for_matching(s) + if normalized: + custom_list.append(normalized) + builtin_instock_strings.extend(custom_list) + + return builtin_instock_strings def run_changedetection(self, watch): import hashlib @@ -205,6 +288,7 @@ def run_changedetection(self, watch): if itemprop_availability.get('availability'): # @todo: Configurable? + if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [ 'instock', 'instoreonly', @@ -238,6 +322,8 @@ def run_changedetection(self, watch): if self.fetcher.instock_data and itemprop_availability.get('availability') is None: # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. # Careful! this does not really come from chrome/js when the watch is set to plaintext + stock_detection_result = self.fetcher.instock_data + update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.") diff --git a/changedetectionio/tests/restock/test_restock.py b/changedetectionio/tests/restock/test_restock.py index ecee00fabc3..bfb9debb62c 100644 --- a/changedetectionio/tests/restock/test_restock.py +++ b/changedetectionio/tests/restock/test_restock.py @@ -111,3 +111,130 @@ def test_restock_detection(client, live_server, measure_memory_usage): res = client.get(url_for("watchlist.index")) assert b'not-in-stock' in res.data, "Correctly showing NOT IN STOCK in the list after it changed from IN STOCK" + +def test_restock_custom_strings(client, live_server): + """Test custom out-of-stock strings feature""" + + # Set up a response with custom out-of-stock text + test_return_data = """ + + Some initial text
+

Which is across multiple lines

+
+ So let's see what happens.
+
price: $10.99
+
Pronto estarán en stock!
+ + + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + + test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet') + + # Add watch with custom out-of-stock strings + res = client.post( + url_for("ui.ui_views.form_quick_watch_add"), + data={"url": test_url, "tags": '', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + # Get the UUID so we can configure the watch + uuid = extract_UUID_from_client(client) + + # Configure custom out-of-stock strings + res = client.post( + url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), + data={ + "url": test_url, + 'processor': 'restock_diff', + 'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message' + }, + follow_redirects=True + ) + assert b"Updated watch." in res.data + + # Check that it detects as out of stock + wait_for_all_checks(client) + res = client.get(url_for("watchlist.index")) + assert b'not-in-stock' in res.data, "Should detect custom out-of-stock string" + + # Test custom in-stock strings by changing the content + test_return_data_instock = """ + + Some initial text
+

Which is across multiple lines

+
+ So let's see what happens.
+
price: $10.99
+
Disponible ahora
+ + + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data_instock) + + # Update the watch to include custom in-stock strings + res = client.post( + url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), + data={ + "url": test_url, + 'processor': 'restock_diff', + 'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message', + 'restock_settings-custom_instock_strings': 'Disponible ahora\nIn voorraad' + }, + follow_redirects=True + ) + assert b"Updated watch." in res.data + + # Check again - should be detected as in stock now + client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + res = client.get(url_for("watchlist.index")) + assert b'not-in-stock' not in res.data, "Should detect custom in-stock string and show as available" + + +def test_restock_custom_strings_normalization(client, live_server): + """Test key normalization scenarios: accents, case, and spaces""" + + # Test page with Spanish text with accents and mixed case + test_return_data = """ + +
price: $10.99
+
¡TEMPORALMENTE AGOTADO!
+ + + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + + test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet') + + # Add watch + res = client.post( + url_for("ui.ui_views.form_quick_watch_add"), + data={"url": test_url, "tags": '', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + uuid = extract_UUID_from_client(client) + + # Configure custom string without accents, lowercase, no extra spaces + res = client.post( + url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), + data={ + "url": test_url, + 'processor': 'restock_diff', + 'restock_settings-custom_outofstock_strings': 'temporalmente agotado' + }, + follow_redirects=True + ) + + # Should detect as out of stock despite text differences + wait_for_all_checks(client) + res = client.get(url_for("watchlist.index")) + assert b'not-in-stock' in res.data, "Should match despite accents, case, and spacing differences" + diff --git a/changedetectionio/tests/unit/test_custom_string_normalization.py b/changedetectionio/tests/unit/test_custom_string_normalization.py new file mode 100644 index 00000000000..6323c1af8d3 --- /dev/null +++ b/changedetectionio/tests/unit/test_custom_string_normalization.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +import unittest +from changedetectionio.processors.restock_diff.processor import perform_site_check + + +class TestCustomStringNormalization(unittest.TestCase): + """Test the text normalization logic for custom out-of-stock strings""" + + def setUp(self): + # Create a processor instance for testing + self.processor = perform_site_check(datastore=None, watch_uuid='test') + + def test_normalize_text_for_matching(self): + """Test the _normalize_text_for_matching method""" + + test_cases = [ + # (input, expected_output) + ("Agotado", "agotado"), + ("AGOTADO", "agotado"), # Lowercase + ("Sin stock!", "sin stock!"), # Normalize whitespace + ("Pronto\t\nestarán\nen stock", "pronto estaran en stock"), # Multiple whitespace types + accents + ("¡Temporalmente AGOTADO!", "¡temporalmente agotado!"), # Complex case + ("", ""), # Empty string + ("café", "cafe"), # French accent + ("naïve", "naive"), # Multiple accents + ] + + for input_text, expected in test_cases: + with self.subTest(input_text=input_text): + result = self.processor._normalize_text_for_matching(input_text) + self.assertEqual(result, expected, + f"Failed to normalize '{input_text}' -> expected '{expected}', got '{result}'") + + def test_check_custom_strings_normalization(self): + """Test that custom string matching works with normalization""" + + test_cases = [ + # (page_text, custom_strings, should_match, description) + ("AGOTADO", "agotado", True, "uppercase to lowercase"), + ("Agotado", "agotado", True, "single uppercase to lowercase"), + ("Sin stock!", "sin stock", True, "multiple spaces normalized"), + ("¡Pronto estarán en stock!", "pronto estaran en stock", True, "accents + spaces"), + ("TEMPORALMENTE AGOTADO", "temporalmente agotado", True, "multi-word uppercase"), + ("Available now", "agotado", False, "no match case"), + ("", "agotado", False, "empty text"), + ("agotado", "", False, "empty custom strings"), + ] + + for page_text, custom_strings, should_match, description in test_cases: + with self.subTest(description=description): + result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock") + + if should_match: + self.assertIsNotNone(result, + f"Expected match for '{description}': '{page_text}' should match '{custom_strings}'") + else: + self.assertIsNone(result, + f"Expected no match for '{description}': '{page_text}' should not match '{custom_strings}'") + + def test_check_custom_strings_multiline(self): + """Test that multi-line custom strings work properly""" + + page_text = "Product status: TEMPORALMENTE AGOTADO" + custom_strings = """ + sin stock + agotado + temporalmente agotado + """ + + result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock") + self.assertIsNotNone(result) + self.assertEqual(result.strip(), "temporalmente agotado") + + def test_get_combined_instock_strings_normalization(self): + """Test that custom in-stock strings are normalized properly""" + + restock_settings = { + 'custom_instock_strings': 'Disponible AHORA\nEn Stock\nDISPONÍBLE' + } + + result = self.processor._get_combined_instock_strings(restock_settings) + + # Check that built-in strings are included + self.assertIn('instock', result) + self.assertIn('presale', result) + + # Check that custom strings are normalized and included + self.assertIn('disponible ahora', result) + self.assertIn('en stock', result) + self.assertIn('disponible', result) # accent removed + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file