diff --git a/README.md b/README.md index 8efc42aa..575a8a13 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,28 @@ System.out.println(resultHexadecimal); // "An 😀awesome 😃string with a few 😉emojis!" ``` +#### To hexadecimal + +To replace all the emoji's unicodes found in a string by their hexadecimal codes representation, use `EmojiParser#parseToStringHexadecimalWithEscapeSequences(String)` or `EmojiParser#parseToStringHexadecimalWithEscapeSequences(String)`. + +For example: + +```java +String str1 = "Hello World! 😀"; + +String resultHexadecimal1 = EmojiParser.parseToHtmlDecimal(str1); +System.out.println(resultHexadecimal1); +// Prints: +// "Hello World! \\ud83d\\ude00" + +String str2 = "An 😀awesome 😃string with a few 😉emojis!"; + +String resultHexadecimal2 = EmojiParser.parseToStringHexadecimalWithEscapeSequences(str2); +System.out.println(resultHexadecimal2); +// Prints: +// "An \\ud83d\\ude00awesome \\ud83d\\ude03string with a few \\ud83d\\ude09emojis!" +``` + By default, any Fitzpatrick modifier will be removed. If you want to ignore the Fitzpatrick modifiers, use `EmojiParser#parseToAliases(String, FitzpatrickAction)`. Examples: ```java diff --git a/src/main/java/com/vdurmont/emoji/Emoji.java b/src/main/java/com/vdurmont/emoji/Emoji.java index dfb6924f..d5187cbd 100644 --- a/src/main/java/com/vdurmont/emoji/Emoji.java +++ b/src/main/java/com/vdurmont/emoji/Emoji.java @@ -19,6 +19,7 @@ public class Emoji { private final String unicode; private final String htmlDec; private final String htmlHex; + private final String stringHex; /** * Constructor for the Emoji. @@ -47,21 +48,32 @@ protected Emoji( int stringLength = getUnicode().length(); String[] pointCodes = new String[stringLength]; String[] pointCodesHex = new String[stringLength]; + String[] pointCodesStringHex = new String[stringLength]; for (int offset = 0; offset < stringLength; ) { final int codePoint = getUnicode().codePointAt(offset); pointCodes[count] = String.format("&#%d;", codePoint); - pointCodesHex[count++] = String.format("&#x%x;", codePoint); + pointCodesHex[count] = String.format("&#x%x;", codePoint); + pointCodesStringHex[count++] = convertEscapeSequence(codePoint); offset += Character.charCount(codePoint); } this.htmlDec = stringJoin(pointCodes, count); this.htmlHex = stringJoin(pointCodesHex, count); + this.stringHex = stringJoin(pointCodesStringHex, count); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } + + public String convertEscapeSequence(int codePoint) { + String scapes = ""; + for(char c : new StringBuilder().appendCodePoint(codePoint).toString().toCharArray()) { + scapes += "\\\\u"+String.valueOf(Integer.toHexString(c)); + } + return scapes; + } /** * Method to replace String.join, since it was only introduced in java8 @@ -211,4 +223,13 @@ public String toString() { ", htmlHex='" + htmlHex + '\'' + '}'; } + + /** + * Returns the string codepoint representation of the emoji + * + * @return the String codepoint hexadecimal representation + */ + public String getStringHexadecimalWithEscapeSequences() { + return this.stringHex; + } } diff --git a/src/main/java/com/vdurmont/emoji/EmojiParser.java b/src/main/java/com/vdurmont/emoji/EmojiParser.java index b6294a47..790c71b6 100644 --- a/src/main/java/com/vdurmont/emoji/EmojiParser.java +++ b/src/main/java/com/vdurmont/emoji/EmojiParser.java @@ -4,6 +4,10 @@ import java.util.Collection; import java.util.List; +import com.vdurmont.emoji.EmojiParser.EmojiTransformer; +import com.vdurmont.emoji.EmojiParser.FitzpatrickAction; +import com.vdurmont.emoji.EmojiParser.UnicodeCandidate; + /** * Provides methods to parse strings with emojis. * @@ -573,4 +577,38 @@ public enum FitzpatrickAction { public interface EmojiTransformer { String transform(UnicodeCandidate unicodeCandidate); } + + /** + * See {@link #parseToStringHexadecimalWithEscapeSequences(String, FitzpatrickAction)} with the + * action "PARSE" + * + * @param input the string to parse + * + * @return the string with the emojis replaced by their text hex + * representation. + */ + public static String parseToStringHexadecimalWithEscapeSequences(String input) { + return parseToStringHexadecimalWithEscapeSequences(input, FitzpatrickAction.PARSE); + } + + public static String parseToStringHexadecimalWithEscapeSequences( + String input, + final FitzpatrickAction fitzpatrickAction + ) { + EmojiTransformer emojiTransformer = new EmojiTransformer() { + public String transform(UnicodeCandidate unicodeCandidate) { + switch (fitzpatrickAction) { + default: + case PARSE: + case REMOVE: + return unicodeCandidate.getEmoji().getStringHexadecimalWithEscapeSequences(); + case IGNORE: + return unicodeCandidate.getEmoji().getStringHexadecimalWithEscapeSequences() + + unicodeCandidate.getFitzpatrickUnicode(); + } + } + }; + + return parseFromUnicode(input, emojiTransformer); + } } diff --git a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java index 2965cebb..28d454bd 100644 --- a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java +++ b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java @@ -550,4 +550,16 @@ public void parseToAliases_with_first_medal() { // THEN assertEquals(":first_place_medal:", result); } + + @Test + public void parseToAliases_with_first_medala() { + // GIVEN + String str = "Hello World! 😀"; + + // WHEN + String result = EmojiParser.parseToStringHexadecimalWithEscapeSequences(str); + + // THEN + assertEquals("Hello World! \\\\ud83d\\\\ude00", result); + } }