Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions jimmy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@
###########################################################


def get_binaries_folder() -> Path:
# Extend the path here, since it's needed for CLI and TUI.
# Search for the local/pyinstaller binaries first.
# https://pyinstaller.org/en/stable/runtime-information.html#using-file
return Path(__file__).parent.parent / "bin"


@dataclasses.dataclass
class Config:
interface: str = "tui"
Expand Down
14 changes: 8 additions & 6 deletions jimmy/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class BaseConverter(abc.ABC):
def __init__(self, config: common.Config, *_args, **_kwargs):
self._config = config

self.pandoc = jimmy.md_lib.convert.MarkupConverter()

accepted_inputs = jimmy.variables.FORMAT_REGISTRY.get(config.format)
self.accepted_extensions = accepted_inputs["accepted_extensions"] # type: ignore[index]
self.accept_folder = accepted_inputs["accept_folder"] # type: ignore[index]
Expand Down Expand Up @@ -239,7 +241,7 @@ def convert_note(self, file_: Path, parent: imf.Notebook):
format_ = file_.suffix.lower()[1:]
match format_:
case "adoc" | "asciidoc" | "asciidoctor":
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(
note_imf.body = self.pandoc.markup_to_markdown(
file_.read_text(encoding="utf-8"),
pwd=file_.parent,
format_="asciidoc",
Expand All @@ -252,7 +254,7 @@ def convert_note(self, file_: Path, parent: imf.Notebook):
extra_args=["--shift-heading-level-by=1"],
)
case "eml" | "mht" | "mhtml":
note_imf = jimmy.md_lib.eml.eml_to_note(file_, self.resource_folder)
note_imf = jimmy.md_lib.eml.eml_to_note(file_, self.resource_folder, self.pandoc)
parent.child_notes.append(note_imf)
return # don't use the common conversion
case "fountain":
Expand Down Expand Up @@ -285,7 +287,7 @@ def convert_note(self, file_: Path, parent: imf.Notebook):
note_imf.body = file_.read_text(encoding="utf-8")
case "docx" | "odt":
# binary format, supported by pandoc
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(
note_imf.body = self.pandoc.markup_to_markdown(
file_.read_bytes(),
pwd=file_.parent,
format_=format_,
Expand All @@ -296,15 +298,15 @@ def convert_note(self, file_: Path, parent: imf.Notebook):
root_tag = root.tag.rpartition("}")[-1] # strip namespace
match root_tag:
case "endnote" | "mediawiki" | "opml": # TODO: endnotexml and opml example
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(
note_imf.body = self.pandoc.markup_to_markdown(
file_.read_text(encoding="utf-8"),
pwd=file_.parent,
format_=root_tag,
resource_folder=self.resource_folder,
)
# TODO: docbook
# case "book":
# note_imf.body = jimmy.md_lib.convert.markup_to_markdown(
# note_imf.body = self.pandoc.markup_to_markdown(
# file_.read_text(encoding="utf-8"),
# pwd=file_.parent,
# format_="docbook",
Expand All @@ -314,7 +316,7 @@ def convert_note(self, file_: Path, parent: imf.Notebook):
note_imf.body = file_.read_text(encoding="utf-8")
case _: # last resort
pandoc_format = jimmy.md_lib.convert.PANDOC_INPUT_FORMAT_MAP.get(format_, format_)
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(
note_imf.body = self.pandoc.markup_to_markdown(
file_.read_text(encoding="utf-8"),
pwd=file_.parent,
format_=pandoc_format,
Expand Down
2 changes: 1 addition & 1 deletion jimmy/formats/anki.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def convert_note(self, note_index, db_row, models, media_dict, note_deck_id_map)
# + "\n\n"
# + replace(template["afmt"], template_replacements)
# )
# body = jimmy.md_lib.convert.markup_to_markdown(back)
# body = self.pandoc.markup_to_markdown(back)
body_md = "\n".join([f"- {key}: {value}" for key, value in template_replacements.items()])
# cleanup
body_md = (
Expand Down
3 changes: 1 addition & 2 deletions jimmy/formats/google_keep.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import json

from jimmy import common, converter, intermediate_format as imf
import jimmy.md_lib.convert


class Converter(converter.BaseConverter):
Expand Down Expand Up @@ -39,7 +38,7 @@ def convert_note(self, file_: Path):
if "textContent" in note_keep:
note_imf.body = note_keep["textContent"]
elif (body_html := note_keep.get("textContentHtml")) is not None:
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(body_html, pwd=file_.parent)
note_imf.body = self.pandoc.markup_to_markdown(body_html, pwd=file_.parent)
elif (body_list := note_keep.get("listContent")) is not None:
# task list
list_items_md = []
Expand Down
2 changes: 1 addition & 1 deletion jimmy/formats/nimbus_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def convert_note(self, file_: Path, parent: imf.Notebook):

note_imf = imf.Note(title, source_application=self.format, original_id=title)

note_imf.body = jimmy.md_lib.convert.markup_to_markdown(
note_imf.body = self.pandoc.markup_to_markdown(
note_html,
pwd=temp_folder_note,
custom_filter=[
Expand Down
2 changes: 1 addition & 1 deletion jimmy/formats/notion.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def convert_note(self, item: Path, relative_parent_path: str, parent_notebook: i
body = item.read_text(encoding="utf-8")
if item.suffix.lower() == ".html":
# html, else markdown
body = jimmy.md_lib.convert.markup_to_markdown(
body = self.pandoc.markup_to_markdown(
body,
pwd=item.parent,
custom_filter=[jimmy.md_lib.html_filter.notion_streamline_lists],
Expand Down
2 changes: 1 addition & 1 deletion jimmy/formats/onenote.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def convert_note(self, page: Path, parent: imf.Notebook):
self.extract_metadata(soup)

# TODO: Strip title and extract date. This could be done in one2html already.
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(str(soup), pwd=page.parent)
note_imf.body = self.pandoc.markup_to_markdown(str(soup), pwd=page.parent)

note_imf.resources, note_imf.note_links = self.handle_markdown_links(note_imf.body, page)

Expand Down
4 changes: 1 addition & 3 deletions jimmy/formats/rednotebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ def convert_note(self, data: dict, date: datetime.date):
# def fix_quotation_marks(match: re.Match):
# return f'""{match.group(1)}.{match.group(2)}""]'
# body_preprocessed = WRONG_QUOTATION_RE.sub(fix_quotation_marks, data["text"])
body = jimmy.md_lib.convert.markup_to_markdown(
data["text"], format_="t2t", standalone=False
)
body = self.pandoc.markup_to_markdown(data["text"], format_="t2t", standalone=False)
body, resources = self.handle_markdown_links(body)
note_imf = imf.Note(
title,
Expand Down
2 changes: 1 addition & 1 deletion jimmy/formats/synology_note_station.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def convert_note(self, note_id, note_id_title_map):

note_links: imf.NoteLinks = []
if (content_html := note.get("content")) is not None:
content_markdown = jimmy.md_lib.convert.markup_to_markdown(
content_markdown = self.pandoc.markup_to_markdown(
content_html,
custom_filter=[
jimmy.md_lib.html_filter.synology_note_station_fix_checklists,
Expand Down
20 changes: 11 additions & 9 deletions jimmy/formats/tiddlywiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,9 @@


class MarkdownHtmlSeparator(HTMLParser):
def __init__(self, *args, **kwargs):
def __init__(self, pandoc, *args, **kwargs):
super().__init__(*args, **kwargs)
self.pandoc = pandoc
self.active_html_tags = []
self.md = []
self.html = []
Expand Down Expand Up @@ -232,9 +233,7 @@ def handle_data(self, data):

def handle_remaining_html(self):
if self.html:
self.md.append(
jimmy.md_lib.convert.markup_to_markdown("".join(self.html), standalone=False)
)
self.md.append(self.pandoc.markup_to_markdown("".join(self.html), standalone=False))
self.html = []

def get_md(self) -> str:
Expand All @@ -248,14 +247,17 @@ def get_md(self) -> str:
return "".join(self.md)


def wikitext_html_to_md(wikitext_html: str) -> str:
def wikitext_html_to_md(wikitext_html: str, pandoc) -> str:
# convert wikitext + HTML to markdown + HTML
if not wikitext_html.strip():
return wikitext_html

# TODO: slow
md_html = jimmy.md_lib.tiddlywiki.wikitext_to_md(wikitext_html)

# convert remaining HTML to markdown
# Wikitext can contain HTML: https://tiddlywiki.com/#HTML%20in%20WikiText
parser = MarkdownHtmlSeparator()
parser = MarkdownHtmlSeparator(pandoc)
try:
parser.feed(md_html)
return parser.get_md()
Expand Down Expand Up @@ -456,12 +458,12 @@ def convert_note_json(self, tiddler):
elif (uri := tiddler.get("_canonical_uri")) is not None:
body = jimmy.md_lib.links.make_link(title, uri)
else:
body = wikitext_html_to_md(tiddler.get("text", ""))
body = wikitext_html_to_md(tiddler.get("text", ""), self.pandoc)
self.logger.warning(f"Unhandled attachment type {mime}")
elif mime == "application/json":
body = "```\n" + tiddler.get("text", "") + "\n```"
else:
body = wikitext_html_to_md(tiddler.get("text", ""))
body = wikitext_html_to_md(tiddler.get("text", ""), self.pandoc)

note_imf = imf.Note(
title,
Expand Down Expand Up @@ -511,7 +513,7 @@ def convert_note(self, file_or_folder: Path):

self.pascalcase_title_note_id_map[common.to_pascal_case(title)] = title

body = wikitext_html_to_md(body_wikitext)
body = wikitext_html_to_md(body_wikitext, self.pandoc)
note_imf = imf.Note(
title,
body,
Expand Down
2 changes: 1 addition & 1 deletion jimmy/formats/upnote.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def convert_note(self, note_upnote: dict, resource_path: Path):
source_application=self.format,
)

note_body = jimmy.md_lib.convert.markup_to_markdown(
note_body = self.pandoc.markup_to_markdown(
note_upnote["data"]["html"],
custom_filter=[
jimmy.md_lib.html_filter.upnote_add_formula,
Expand Down
5 changes: 2 additions & 3 deletions jimmy/formats/wordpress.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import xml.etree.ElementTree as ET # noqa: N817

from jimmy import common, converter, intermediate_format as imf
import jimmy.md_lib.convert


def get_text(element, default: str | None = None) -> str | None:
Expand Down Expand Up @@ -55,7 +54,7 @@ def convert_note(self, item, parent_notebook: imf.Notebook, namespaces):

content = get_text(item.find("content:encoded", namespaces))
if content is not None:
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(content)
note_imf.body = self.pandoc.markup_to_markdown(content)
for attachment in item.findall("wp:attachment_url", namespaces):
attachment_text = get_text(attachment)
if attachment_text is None:
Expand All @@ -75,7 +74,7 @@ def convert_note(self, item, parent_notebook: imf.Notebook, namespaces):
)
comment_content = get_text(comment.find("wp:comment_content", namespaces))
if comment_content is not None:
comment_content_md = jimmy.md_lib.convert.markup_to_markdown(
comment_content_md = self.pandoc.markup_to_markdown(
comment_content, standalone=False
)
comments_md.extend(["", f"**{comment_author}**: {comment_content_md}"])
Expand Down
2 changes: 1 addition & 1 deletion jimmy/formats/zoho_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def convert_note(self, file_: Path):

# convert the note body to Markdown
if soup.body is not None:
note_imf.body = jimmy.md_lib.convert.markup_to_markdown(str(soup), pwd=file_.parent)
note_imf.body = self.pandoc.markup_to_markdown(str(soup), pwd=file_.parent)

# resources and internal links
note_imf.resources, note_imf.note_links = self.handle_markdown_links(note_imf.body)
Expand Down
1 change: 1 addition & 0 deletions jimmy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def setup_logging(custom_handlers: list | None = None):
# "markdown_it",
"pypandoc",
"python-markdown",
"urllib3",
"watchdog",
)
]
Expand Down
Loading