From 38656103c0bcad9c585eabfb0be5c65382e61663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Acid=20Chicken=20=28=E7=A1=AB=E9=85=B8=E9=B6=8F=29?= Date: Thu, 14 Mar 2019 21:23:15 +0900 Subject: [PATCH] Add angle bracket covered url syntax to mfm (#4483) * Add angle bracket covered url syntax to mfm * Fix path * Fix match * Fix index --- src/mfm/fromHtml.ts | 5 +++-- src/mfm/language.ts | 15 +++++++++++---- src/mfm/normalize.ts | 2 +- src/mfm/parse.ts | 2 +- src/mfm/{types.ts => prelude.ts} | 2 ++ src/mfm/toHtml.ts | 2 +- src/misc/extract-emojis.ts | 2 +- src/misc/extract-hashtags.ts | 2 +- src/misc/extract-mentions.ts | 2 +- test/mfm.ts | 16 +++++++++++++++- 10 files changed, 37 insertions(+), 13 deletions(-) rename src/mfm/{types.ts => prelude.ts} (91%) diff --git a/src/mfm/fromHtml.ts b/src/mfm/fromHtml.ts index 2adaf01371..330ebdde09 100644 --- a/src/mfm/fromHtml.ts +++ b/src/mfm/fromHtml.ts @@ -1,5 +1,6 @@ import { parseFragment, DefaultTreeDocumentFragment } from 'parse5'; import { URL } from 'url'; +import { urlRegex } from './prelude'; export function fromHtml(html: string): string { if (html == null) return null; @@ -14,7 +15,7 @@ export function fromHtml(html: string): string { return text.trim(); - function getText(node: any) { + function getText(node: any): string { if (node.nodeName == '#text') return node.value; if (node.childNodes) { @@ -41,7 +42,7 @@ export function fromHtml(html: string): string { // ハッシュタグ / hrefがない / txtがURL if ((rel && rel.value.match('tag') !== null) || !href || href.value == txt) { - text += txt; + text += txt.match(urlRegex) ? txt : `<${txt}>`; // メンション } else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) { const part = txt.split('@'); diff --git a/src/mfm/language.ts b/src/mfm/language.ts index 60e4935ed2..7b083b99af 100644 --- a/src/mfm/language.ts +++ b/src/mfm/language.ts @@ -1,5 +1,5 @@ import * as P from 'parsimmon'; -import { createLeaf, createTree } from './types'; +import { createLeaf, createTree, urlRegex } from './prelude'; import { takeWhile, cumulativeSum } from '../prelude/array'; import parseAcct from '../misc/acct/parse'; import { toUnicode } from 'punycode'; @@ -154,9 +154,16 @@ export const mfmLanguage = P.createLanguage({ url: () => { return P((input, i) => { const text = input.substr(i); - const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/); - if (!match) return P.makeFailure(i, 'not a url'); - let url = match[0]; + const match = text.match(urlRegex); + let url: string; + if (!match) { + const match = text.match(/^<(https?:\/\/.*?)>/); + if (!match) + return P.makeFailure(i, 'not a url'); + url = match[1]; + i += 2; + } else + url = match[0]; url = removeOrphanedBrackets(url); if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.')); if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(',')); diff --git a/src/mfm/normalize.ts b/src/mfm/normalize.ts index 732c3085a9..1a2b5bdcd6 100644 --- a/src/mfm/normalize.ts +++ b/src/mfm/normalize.ts @@ -1,6 +1,6 @@ import * as A from '../prelude/array'; import * as S from '../prelude/string'; -import { MfmForest, MfmTree } from './types'; +import { MfmForest, MfmTree } from './prelude'; import { createTree, createLeaf } from '../prelude/tree'; function isEmptyTextTree(t: MfmTree): boolean { diff --git a/src/mfm/parse.ts b/src/mfm/parse.ts index 0343ff09d8..9d60771708 100644 --- a/src/mfm/parse.ts +++ b/src/mfm/parse.ts @@ -1,5 +1,5 @@ import { mfmLanguage } from './language'; -import { MfmForest } from './types'; +import { MfmForest } from './prelude'; import { normalize } from './normalize'; export function parse(source: string): MfmForest { diff --git a/src/mfm/types.ts b/src/mfm/prelude.ts similarity index 91% rename from src/mfm/types.ts rename to src/mfm/prelude.ts index 2c690f317e..7584389184 100644 --- a/src/mfm/types.ts +++ b/src/mfm/prelude.ts @@ -35,3 +35,5 @@ export function createLeaf(type: string, props: any): MfmTree { export function createTree(type: string, children: MfmForest, props: any): MfmTree { return T.createTree({ type, props }, children); } + +export const urlRegex = /^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/; diff --git a/src/mfm/toHtml.ts b/src/mfm/toHtml.ts index e67ccc5035..c676ae6ffc 100644 --- a/src/mfm/toHtml.ts +++ b/src/mfm/toHtml.ts @@ -2,7 +2,7 @@ import { JSDOM } from 'jsdom'; import config from '../config'; import { INote } from '../models/note'; import { intersperse } from '../prelude/array'; -import { MfmForest, MfmTree } from './types'; +import { MfmForest, MfmTree } from './prelude'; export function toHtml(tokens: MfmForest, mentionedRemoteUsers: INote['mentionedRemoteUsers'] = []) { if (tokens == null) { diff --git a/src/misc/extract-emojis.ts b/src/misc/extract-emojis.ts index ba810b5f51..2c57e9a8aa 100644 --- a/src/misc/extract-emojis.ts +++ b/src/misc/extract-emojis.ts @@ -1,4 +1,4 @@ -import { EmojiNode, MfmForest } from '../mfm/types'; +import { EmojiNode, MfmForest } from '../mfm/prelude'; import { preorderF } from '../prelude/tree'; import { unique } from '../prelude/array'; diff --git a/src/misc/extract-hashtags.ts b/src/misc/extract-hashtags.ts index a6b801a7a2..36b2296a76 100644 --- a/src/misc/extract-hashtags.ts +++ b/src/misc/extract-hashtags.ts @@ -1,4 +1,4 @@ -import { HashtagNode, MfmForest } from '../mfm/types'; +import { HashtagNode, MfmForest } from '../mfm/prelude'; import { preorderF } from '../prelude/tree'; import { unique } from '../prelude/array'; diff --git a/src/misc/extract-mentions.ts b/src/misc/extract-mentions.ts index 6ddd31004f..72330d31e1 100644 --- a/src/misc/extract-mentions.ts +++ b/src/misc/extract-mentions.ts @@ -1,6 +1,6 @@ // test is located in test/extract-mentions -import { MentionNode, MfmForest } from '../mfm/types'; +import { MentionNode, MfmForest } from '../mfm/prelude'; import { preorderF } from '../prelude/tree'; export default function(mfmForest: MfmForest): MentionNode['props'][] { diff --git a/test/mfm.ts b/test/mfm.ts index fa46c3ff0e..191ee5e0ed 100644 --- a/test/mfm.ts +++ b/test/mfm.ts @@ -12,7 +12,7 @@ import * as assert from 'assert'; import { parse, parsePlain } from '../src/mfm/parse'; import { toHtml } from '../src/mfm/toHtml'; -import { createTree as tree, createLeaf as leaf, MfmTree } from '../src/mfm/types'; +import { createTree as tree, createLeaf as leaf, MfmTree } from '../src/mfm/prelude'; import { removeOrphanedBrackets } from '../src/mfm/language'; function text(text: string): MfmTree { @@ -840,6 +840,20 @@ describe('MFM', () => { text(')') ]); }); + + it('ignore non-ascii characters contained url without angle brackets', () => { + const tokens = parse('https://大石泉すき.example.com'); + assert.deepStrictEqual(tokens, [ + text('https://大石泉すき.example.com') + ]); + }); + + it('match non-ascii characters contained url with angle brackets', () => { + const tokens = parse(''); + assert.deepStrictEqual(tokens, [ + leaf('url', { url: 'https://大石泉すき.example.com' }) + ]); + }); }); describe('link', () => {