Implement version 2.0.0 #5
@ -3,7 +3,7 @@ package com.marvinelsen.chinese.transliteration
|
|||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
@Suppress("MagicNumber", "MaximumLineLength", "MaxLineLength")
|
@Suppress("MagicNumber", "MaximumLineLength", "MaxLineLength")
|
||||||
data class Syllable(
|
data class PinyinSyllable(
|
||||||
val pinyinSyllableWithoutTone: String,
|
val pinyinSyllableWithoutTone: String,
|
||||||
val tone: Tone,
|
val tone: Tone,
|
||||||
) {
|
) {
|
||||||
@ -19,7 +19,7 @@ data class Syllable(
|
|||||||
.substring(0, pinyinSyllable.lastIndex)
|
.substring(0, pinyinSyllable.lastIndex)
|
||||||
.lowercase() in pinyinToZhuyin
|
.lowercase() in pinyinToZhuyin
|
||||||
|
|
||||||
fun fromPinyinWithToneNumber(pinyinWithToneNumber: String): Syllable {
|
fun fromPinyinWithToneNumber(pinyinWithToneNumber: String): PinyinSyllable {
|
||||||
val pinyinWithoutNumber = pinyinWithToneNumber.substring(0, pinyinWithToneNumber.lastIndex)
|
val pinyinWithoutNumber = pinyinWithToneNumber.substring(0, pinyinWithToneNumber.lastIndex)
|
||||||
val lastCharacter = pinyinWithToneNumber.last()
|
val lastCharacter = pinyinWithToneNumber.last()
|
||||||
|
|
||||||
@ -33,18 +33,18 @@ data class Syllable(
|
|||||||
pinyinWithoutNumber.lowercase() in pinyinToZhuyin
|
pinyinWithoutNumber.lowercase() in pinyinToZhuyin
|
||||||
) { "'$pinyinWithoutNumber' is not a valid Pinyin syllable." }
|
) { "'$pinyinWithoutNumber' is not a valid Pinyin syllable." }
|
||||||
|
|
||||||
return Syllable(
|
return PinyinSyllable(
|
||||||
pinyinSyllableWithoutTone = pinyinWithoutNumber,
|
pinyinSyllableWithoutTone = pinyinWithoutNumber,
|
||||||
tone = Tone.fromDigit(lastCharacter)
|
tone = Tone.fromDigit(lastCharacter)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun fromZhuyin(zhuyin: String): Syllable {
|
fun fromZhuyin(zhuyin: String): PinyinSyllable {
|
||||||
val zhuyinWithoutToneMark = zhuyin.replace(zhuyinToneMarkRegex, "")
|
val zhuyinWithoutToneMark = zhuyin.replace(zhuyinToneMarkRegex, "")
|
||||||
|
|
||||||
require(zhuyinWithoutToneMark in zhuyinToPinyin) { "'$zhuyin' is not a valid Zhuyin syllable." }
|
require(zhuyinWithoutToneMark in zhuyinToPinyin) { "'$zhuyin' is not a valid Zhuyin syllable." }
|
||||||
|
|
||||||
return Syllable(
|
return PinyinSyllable(
|
||||||
zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
||||||
Tone.fromZhuyinToneMarkOrNull(zhuyin.last()) ?: Tone.fromZhuyinToneMarkOrNull(zhuyin.first())
|
Tone.fromZhuyinToneMarkOrNull(zhuyin.last()) ?: Tone.fromZhuyinToneMarkOrNull(zhuyin.first())
|
||||||
?: Tone.FIRST
|
?: Tone.FIRST
|
@ -0,0 +1,119 @@
|
|||||||
|
package com.marvinelsen.chinese.transliteration
|
||||||
|
|
||||||
|
import io.kotest.core.spec.style.ShouldSpec
|
||||||
|
import io.kotest.datatest.withData
|
||||||
|
import io.kotest.matchers.shouldBe
|
||||||
|
|
||||||
|
class PinyinSyllableTest : ShouldSpec({
|
||||||
|
context("from pinyin with tone numbers") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
"sheng1" to PinyinSyllable("sheng", Tone.FIRST),
|
||||||
|
"zhi2" to PinyinSyllable("zhi", Tone.SECOND),
|
||||||
|
"ka3" to PinyinSyllable("ka", Tone.THIRD),
|
||||||
|
"yao4" to PinyinSyllable("yao", Tone.FORTH),
|
||||||
|
"me5" to PinyinSyllable("me", Tone.FIFTH),
|
||||||
|
"Me5" to PinyinSyllable("Me", Tone.FIFTH),
|
||||||
|
"nv3" to PinyinSyllable("nv", Tone.THIRD),
|
||||||
|
"nü3" to PinyinSyllable("nü", Tone.THIRD),
|
||||||
|
"nu:3" to PinyinSyllable("nu:", Tone.THIRD),
|
||||||
|
"r5" to PinyinSyllable("r", Tone.FIFTH),
|
||||||
|
"R5" to PinyinSyllable("R", Tone.FIFTH),
|
||||||
|
"er2" to PinyinSyllable("er", Tone.SECOND),
|
||||||
|
"Er2" to PinyinSyllable("Er", Tone.SECOND),
|
||||||
|
) { (pinyinWithNumber, expectedSyllable) ->
|
||||||
|
PinyinSyllable.fromPinyinWithToneNumber(pinyinWithNumber) shouldBe expectedSyllable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from zhuyin") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
"ㄕㄥ" to PinyinSyllable("sheng", Tone.FIRST),
|
||||||
|
"ㄓˊ" to PinyinSyllable("zhi", Tone.SECOND),
|
||||||
|
"ㄎㄚˇ" to PinyinSyllable("ka", Tone.THIRD),
|
||||||
|
"ㄧㄠˋ" to PinyinSyllable("yao", Tone.FORTH),
|
||||||
|
"ㄇㄜ˙" to PinyinSyllable("me", Tone.FIFTH),
|
||||||
|
"˙ㄇㄜ" to PinyinSyllable("me", Tone.FIFTH),
|
||||||
|
"ㄋㄩˇ" to PinyinSyllable("nü", Tone.THIRD),
|
||||||
|
) { (zhuyin, expectedSyllable) ->
|
||||||
|
PinyinSyllable.fromZhuyin(zhuyin) shouldBe expectedSyllable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from invalid pinyin with tone numbers") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from invalid zhuyin") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to zhuyin") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||||
|
PinyinSyllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
||||||
|
PinyinSyllable("zhi", Tone.SECOND) to "ㄓˊ",
|
||||||
|
PinyinSyllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
||||||
|
PinyinSyllable("yao", Tone.FORTH) to "ㄧㄠˋ",
|
||||||
|
PinyinSyllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
||||||
|
PinyinSyllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
||||||
|
PinyinSyllable("nu:", Tone.THIRD) to "ㄋㄩˇ",
|
||||||
|
PinyinSyllable("nv", Tone.THIRD) to "ㄋㄩˇ",
|
||||||
|
PinyinSyllable("r", Tone.FIFTH) to "˙ㄦ",
|
||||||
|
PinyinSyllable("R", Tone.FIFTH) to "˙ㄦ",
|
||||||
|
PinyinSyllable("er", Tone.SECOND) to "ㄦˊ",
|
||||||
|
PinyinSyllable("Er", Tone.SECOND) to "ㄦˊ",
|
||||||
|
) { (syllable, expectedZhuyin) ->
|
||||||
|
syllable.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyin
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone numbers") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||||
|
PinyinSyllable("sheng", Tone.FIRST) to "sheng1",
|
||||||
|
PinyinSyllable("zhi", Tone.SECOND) to "zhi2",
|
||||||
|
PinyinSyllable("ka", Tone.THIRD) to "ka3",
|
||||||
|
PinyinSyllable("yao", Tone.FORTH) to "yao4",
|
||||||
|
PinyinSyllable("me", Tone.FIFTH) to "me5",
|
||||||
|
PinyinSyllable("nü", Tone.THIRD) to "nü3",
|
||||||
|
PinyinSyllable("nu:", Tone.THIRD) to "nu:3",
|
||||||
|
PinyinSyllable("nv", Tone.THIRD) to "nv3",
|
||||||
|
PinyinSyllable("r", Tone.FIFTH) to "r5",
|
||||||
|
PinyinSyllable("R", Tone.FIFTH) to "R5",
|
||||||
|
PinyinSyllable("er", Tone.SECOND) to "er2",
|
||||||
|
PinyinSyllable("Er", Tone.SECOND) to "Er2",
|
||||||
|
) { (syllable, expectedPinyinWithToneNumbers) ->
|
||||||
|
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedPinyinWithToneNumbers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone marks") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||||
|
PinyinSyllable("sheng", Tone.FIRST) to "shēng",
|
||||||
|
PinyinSyllable("zhi", Tone.SECOND) to "zhí",
|
||||||
|
PinyinSyllable("ka", Tone.THIRD) to "kǎ",
|
||||||
|
PinyinSyllable("yao", Tone.FORTH) to "yào",
|
||||||
|
PinyinSyllable("me", Tone.FIFTH) to "me",
|
||||||
|
PinyinSyllable("zhui", Tone.FIRST) to "zhuī",
|
||||||
|
PinyinSyllable("liu", Tone.FIRST) to "liū",
|
||||||
|
PinyinSyllable("nü", Tone.THIRD) to "nǚ",
|
||||||
|
PinyinSyllable("nu:", Tone.THIRD) to "nǚ",
|
||||||
|
PinyinSyllable("nv", Tone.THIRD) to "nǚ",
|
||||||
|
PinyinSyllable("r", Tone.FIFTH) to "r",
|
||||||
|
PinyinSyllable("er", Tone.SECOND) to "ér",
|
||||||
|
PinyinSyllable("Er", Tone.SECOND) to "Ér",
|
||||||
|
) { (syllable, expectedPinyinWithToneMarks) ->
|
||||||
|
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedPinyinWithToneMarks
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to zhuyin with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
})
|
@ -1,119 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.transliteration
|
|
||||||
|
|
||||||
import io.kotest.core.spec.style.ShouldSpec
|
|
||||||
import io.kotest.datatest.withData
|
|
||||||
import io.kotest.matchers.shouldBe
|
|
||||||
|
|
||||||
class SyllableTest : ShouldSpec({
|
|
||||||
context("from pinyin with tone numbers") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
|
||||||
"sheng1" to Syllable("sheng", Tone.FIRST),
|
|
||||||
"zhi2" to Syllable("zhi", Tone.SECOND),
|
|
||||||
"ka3" to Syllable("ka", Tone.THIRD),
|
|
||||||
"yao4" to Syllable("yao", Tone.FORTH),
|
|
||||||
"me5" to Syllable("me", Tone.FIFTH),
|
|
||||||
"Me5" to Syllable("Me", Tone.FIFTH),
|
|
||||||
"nv3" to Syllable("nv", Tone.THIRD),
|
|
||||||
"nü3" to Syllable("nü", Tone.THIRD),
|
|
||||||
"nu:3" to Syllable("nu:", Tone.THIRD),
|
|
||||||
"r5" to Syllable("r", Tone.FIFTH),
|
|
||||||
"R5" to Syllable("R", Tone.FIFTH),
|
|
||||||
"er2" to Syllable("er", Tone.SECOND),
|
|
||||||
"Er2" to Syllable("Er", Tone.SECOND),
|
|
||||||
) { (pinyinWithNumber, expectedSyllable) ->
|
|
||||||
Syllable.fromPinyinWithToneNumber(pinyinWithNumber) shouldBe expectedSyllable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from zhuyin") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
|
||||||
"ㄕㄥ" to Syllable("sheng", Tone.FIRST),
|
|
||||||
"ㄓˊ" to Syllable("zhi", Tone.SECOND),
|
|
||||||
"ㄎㄚˇ" to Syllable("ka", Tone.THIRD),
|
|
||||||
"ㄧㄠˋ" to Syllable("yao", Tone.FORTH),
|
|
||||||
"ㄇㄜ˙" to Syllable("me", Tone.FIFTH),
|
|
||||||
"˙ㄇㄜ" to Syllable("me", Tone.FIFTH),
|
|
||||||
"ㄋㄩˇ" to Syllable("nü", Tone.THIRD),
|
|
||||||
) { (zhuyin, expectedSyllable) ->
|
|
||||||
Syllable.fromZhuyin(zhuyin) shouldBe expectedSyllable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from invalid pinyin with tone numbers") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from invalid zhuyin") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to zhuyin") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
|
||||||
Syllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
|
||||||
Syllable("zhi", Tone.SECOND) to "ㄓˊ",
|
|
||||||
Syllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
|
||||||
Syllable("yao", Tone.FORTH) to "ㄧㄠˋ",
|
|
||||||
Syllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
|
||||||
Syllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
|
||||||
Syllable("nu:", Tone.THIRD) to "ㄋㄩˇ",
|
|
||||||
Syllable("nv", Tone.THIRD) to "ㄋㄩˇ",
|
|
||||||
Syllable("r", Tone.FIFTH) to "˙ㄦ",
|
|
||||||
Syllable("R", Tone.FIFTH) to "˙ㄦ",
|
|
||||||
Syllable("er", Tone.SECOND) to "ㄦˊ",
|
|
||||||
Syllable("Er", Tone.SECOND) to "ㄦˊ",
|
|
||||||
) { (syllable, expectedZhuyin) ->
|
|
||||||
syllable.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyin
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone numbers") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
|
||||||
Syllable("sheng", Tone.FIRST) to "sheng1",
|
|
||||||
Syllable("zhi", Tone.SECOND) to "zhi2",
|
|
||||||
Syllable("ka", Tone.THIRD) to "ka3",
|
|
||||||
Syllable("yao", Tone.FORTH) to "yao4",
|
|
||||||
Syllable("me", Tone.FIFTH) to "me5",
|
|
||||||
Syllable("nü", Tone.THIRD) to "nü3",
|
|
||||||
Syllable("nu:", Tone.THIRD) to "nu:3",
|
|
||||||
Syllable("nv", Tone.THIRD) to "nv3",
|
|
||||||
Syllable("r", Tone.FIFTH) to "r5",
|
|
||||||
Syllable("R", Tone.FIFTH) to "R5",
|
|
||||||
Syllable("er", Tone.SECOND) to "er2",
|
|
||||||
Syllable("Er", Tone.SECOND) to "Er2",
|
|
||||||
) { (syllable, expectedPinyinWithToneNumbers) ->
|
|
||||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedPinyinWithToneNumbers
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone marks") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
|
||||||
Syllable("sheng", Tone.FIRST) to "shēng",
|
|
||||||
Syllable("zhi", Tone.SECOND) to "zhí",
|
|
||||||
Syllable("ka", Tone.THIRD) to "kǎ",
|
|
||||||
Syllable("yao", Tone.FORTH) to "yào",
|
|
||||||
Syllable("me", Tone.FIFTH) to "me",
|
|
||||||
Syllable("zhui", Tone.FIRST) to "zhuī",
|
|
||||||
Syllable("liu", Tone.FIRST) to "liū",
|
|
||||||
Syllable("nü", Tone.THIRD) to "nǚ",
|
|
||||||
Syllable("nu:", Tone.THIRD) to "nǚ",
|
|
||||||
Syllable("nv", Tone.THIRD) to "nǚ",
|
|
||||||
Syllable("r", Tone.FIFTH) to "r",
|
|
||||||
Syllable("er", Tone.SECOND) to "ér",
|
|
||||||
Syllable("Er", Tone.SECOND) to "Ér",
|
|
||||||
) { (syllable, expectedPinyinWithToneMarks) ->
|
|
||||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedPinyinWithToneMarks
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to zhuyin with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
})
|
|
Loading…
Reference in New Issue
Block a user