Initial commit
Some checks failed
Publish package / publish (push) Has been cancelled

This commit is contained in:
Marvin Elsen 2024-09-20 12:35:42 +02:00
commit 2b116b92e0
Signed by: marvinelsen
GPG Key ID: 820672408CC318C2
20 changed files with 1520 additions and 0 deletions

View File

@ -0,0 +1,24 @@
name: Publish package
on:
push:
branches:
- main
jobs:
publish:
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v4
- name: Setup Java
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: 21
- name: Setup Gradle
uses: gradle/actions/setup-gradle@v4
- name: Publish
run: ./gradlew publish
env:
GITEA_TOKEN: ${{ secrets.PACKAGE_TOKEN }}

View File

@ -0,0 +1,26 @@
name: Pull Request
on:
pull_request:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v4
- name: Setup Java
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: 21
- name: Setup Gradle
uses: gradle/actions/setup-gradle@v4
- name: Lint
run: ./gradlew detekt
- name: Build
run: ./gradlew build testClasses -x check
- name: Test
run: ./gradlew test

42
.gitignore vendored Normal file
View File

@ -0,0 +1,42 @@
.gradle
build/
!gradle/wrapper/gradle-wrapper.jar
!**/src/main/**/build/
!**/src/test/**/build/
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
out/
!**/src/main/**/out/
!**/src/test/**/out/
### Kotlin ###
.kotlin
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
!**/src/main/**/bin/
!**/src/test/**/bin/
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store

19
LICENSE Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2024 Marvin Elsen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.

45
README.md Normal file
View File

@ -0,0 +1,45 @@
# Pinyin Mapper for Kotlin
A Pinyin Mapper written in [Kotlin](https://kotlinlang.org).
## Installation
_Pinyin Mapper for Kotlin_ is available
from [my self-hosted Gitea instance](https://gitea.marvinelsen.com/marvinelsen/cedict-parser).
First, add the repository to your `build.gradle.kts` file:
```kotlin
repositories {
maven {
url = uri("https://gitea.marvinelsen.com/api/packages/marvinelsen/maven")
}
}
```
Afterwards, add the package dependency to your `build.gradle.kts` file:
```kotlin
dependencies {
implementation("com.marvinelsen:pinyin-mapper:1.0-SNAPSHOT")
}
```
## Usage
```kotlin
fun main() {
TODO()
}
```
## License
All source code in this repository is licensed under a [MIT license](LICENSE), unless otherwise noted.
To the following third-party code, data, and files in the repository different licenses apply:
### CC-CEDICT
[CC-CEDICT](https://cc-cedict.org/wiki) is licensed under
a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).

60
build.gradle.kts Normal file
View File

@ -0,0 +1,60 @@
plugins {
alias(libs.plugins.kotlin.jvm)
alias(libs.plugins.detekt)
`maven-publish`
}
group = "com.marvinelsen"
version = "1.0-SNAPSHOT"
repositories {
mavenCentral()
}
dependencies {
detektPlugins(libs.detekt.formatting)
testImplementation(libs.kotest.core)
testImplementation(libs.kotest.assertions)
testImplementation(libs.kotest.data)
}
tasks.test {
useJUnitPlatform()
}
kotlin {
jvmToolchain(21)
}
publishing {
publications {
create<MavenPublication>("maven") {
groupId = project.group as String
artifactId = "pinyin"
version = project.version as String
from(components["java"])
}
}
repositories {
maven {
name = "Gitea"
url = uri("https://gitea.marvinelsen.com/api/packages/marvinelsen/maven")
credentials(HttpHeaderCredentials::class) {
name = "Authorization"
value = "token ${System.getenv("GITEA_TOKEN")}"
}
isAllowInsecureProtocol = true
authentication {
val header by registering(HttpHeaderAuthentication::class)
}
}
}
}
detekt {
buildUponDefaultConfig = true
allRules = false
autoCorrect = true
}

1
gradle.properties Normal file
View File

@ -0,0 +1 @@
kotlin.code.style=official

23
gradle/libs.versions.toml Normal file
View File

@ -0,0 +1,23 @@
[versions]
kotlin = "2.0.20"
kotest = "5.9.1"
detekt = "1.23.7"
[libraries]
# Kotest
# See: https://kotest.io
kotest-core = { module = "io.kotest:kotest-runner-junit5", version.ref = "kotest" }
kotest-assertions = { module = "io.kotest:kotest-assertions-core", version.ref = "kotest" }
kotest-data = { module = "io.kotest:kotest-framework-datatest", version.ref = "kotest" }
# Detekt
# See: https://detekt.dev
detekt-formatting = { module = "io.gitlab.arturbosch.detekt:detekt-formatting", version.ref = "detekt" }
[plugins]
# Kotlin
# See: https://plugins.gradle.org/plugin/org.jetbrains.kotlin.jvm
kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" }
# Detekt
# See: https://detekt.dev
detekt = { id = "io.gitlab.arturbosch.detekt", version.ref = "detekt" }

BIN
gradle/wrapper/gradle-wrapper.jar vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

252
gradlew vendored Executable file
View File

@ -0,0 +1,252 @@
#!/bin/sh
#
# Copyright © 2015-2021 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#
##############################################################################
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
' "$PWD" ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"

94
gradlew.bat vendored Normal file
View File

@ -0,0 +1,94 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@rem SPDX-License-Identifier: Apache-2.0
@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

1
settings.gradle.kts Normal file
View File

@ -0,0 +1 @@
rootProject.name = "pinyin"

View File

@ -0,0 +1,110 @@
package com.marvinelsen.chinese.transliteration
import java.io.InputStream
data class Syllable(
val pinyinSyllableWithoutTone: String,
val tone: Tone,
) {
companion object {
private val pinyinToZhuyin = parseTranscriptions(
this::class.java.getResourceAsStream("/pinyin_zhuyin_transcriptions.tsv")!!
)
private val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
private val zhuyinToneMarkRegex = """[ˊˇˋ˙]""".toRegex()
fun fromPinyinWithToneNumber(pinyinWithToneNumber: String): Syllable {
val pinyinWithoutNumber = pinyinWithToneNumber.substring(0, pinyinWithToneNumber.lastIndex)
val lastCharacter = pinyinWithToneNumber.last()
require(lastCharacter.isDigit()) {
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the last character to be a digit, but was '${pinyinWithToneNumber.last()}'"
}
@Suppress("MagicNumber")
require(lastCharacter.digitToInt() in 1..5) {
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the tone number 'n' to be in range 1 <= n <= 5, but was '${pinyinWithToneNumber.last()}'"
}
require(
pinyinWithoutNumber.lowercase() in pinyinToZhuyin
) { "'$pinyinWithoutNumber' is not a valid Pinyin syllable." }
return Syllable(
pinyinSyllableWithoutTone = pinyinWithoutNumber,
tone = Tone.fromDigit(lastCharacter)
)
}
fun fromZhuyin(zhuyin: String): Syllable {
val zhuyinWithoutToneMark = zhuyin.replace(zhuyinToneMarkRegex, "")
require(zhuyinWithoutToneMark in zhuyinToPinyin) { "'$zhuyin' is not a valid Zhuyin syllable." }
return Syllable(
zhuyinToPinyin[zhuyinWithoutToneMark]!!,
Tone.fromZhuyinToneMarkOrNull(zhuyin.last()) ?: Tone.fromZhuyinToneMarkOrNull(zhuyin.first())
?: Tone.FIRST
)
}
private fun parseTranscriptions(inputStream: InputStream) =
inputStream.bufferedReader().useLines { lines ->
lines.map { it.split('\t') }
.associate { it[0] to it[1] }
}
}
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
TransliterationSystem.ZHUYIN -> formatToZhuyin()
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
}
private fun formatToZhuyin(): String {
val zhuyinSyllable = pinyinToZhuyin[pinyinSyllableWithoutTone.lowercase()]
?: error("$pinyinSyllableWithoutTone is not a valid Pinyin syllable")
val zhuyinToneMark = tone.format(TransliterationSystem.ZHUYIN)
return when (tone) {
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FORTH -> zhuyinSyllable + zhuyinToneMark
Tone.FIFTH -> zhuyinToneMark + zhuyinSyllable
}
}
private fun formatToPinyinWithToneNumbers(): String {
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
}
return pinyinSyllableWithoutTone + tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)
}
private fun formatToPinyinWithToneMarks(): String {
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
}
val sanitizedPinyinSyllableWithoutTone = pinyinSyllableWithoutTone.replace("v", "ü").replace("u:", "ü")
val characterToIndex = sanitizedPinyinSyllableWithoutTone.withIndex().associate { it.value to it.index }
val vowelIndex = when {
'a' in characterToIndex -> characterToIndex['a']!!
'o' in characterToIndex -> characterToIndex['o']!!
'e' in characterToIndex -> characterToIndex['e']!!
'i' in characterToIndex ->
if (sanitizedPinyinSyllableWithoutTone.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
characterToIndex['u']!!
} else {
characterToIndex['i']!!
}
'u' in characterToIndex -> characterToIndex['u']!!
'ü' in characterToIndex -> characterToIndex['ü']!!
else -> error("No vowel found in Pinyin syllable '$sanitizedPinyinSyllableWithoutTone'")
}
return buildString {
append(sanitizedPinyinSyllableWithoutTone)
insert(vowelIndex + 1, tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS))
}
}
}

View File

@ -0,0 +1,81 @@
package com.marvinelsen.chinese.transliteration
@Suppress("MagicNumber")
enum class Tone {
FIRST, SECOND, THIRD, FORTH, FIFTH;
companion object {
fun fromInt(number: Int) =
fromIntOrNull(number) ?: throw IllegalArgumentException("Number $number is not a valid tone")
fun fromIntOrNull(number: Int) = when (number) {
1 -> FIRST
2 -> SECOND
3 -> THIRD
4 -> FORTH
5 -> FIFTH
else -> null
}
fun fromDigit(digit: Char) =
fromDigitOrNull(digit) ?: throw IllegalArgumentException("Digit $digit is not a valid tone")
fun fromDigitOrNull(digit: Char) = when (digit) {
'1' -> FIRST
'2' -> SECOND
'3' -> THIRD
'4' -> FORTH
'5' -> FIFTH
else -> null
}
fun fromZhuyinToneMark(zhuyinToneMark: Char) = fromZhuyinToneMarkOrNull(zhuyinToneMark)
?: throw IllegalArgumentException("Invalid zhuyin tone mark '$zhuyinToneMark'")
fun fromZhuyinToneMarkOrNull(zhuyinToneMark: Char) = when (zhuyinToneMark) {
'ˊ' -> SECOND
'ˇ' -> THIRD
'ˋ' -> FORTH
'˙' -> FIFTH
else -> null
}
}
fun toInt() = when (this) {
FIRST -> 1
SECOND -> 2
THIRD -> 3
FORTH -> 4
FIFTH -> 5
}
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
TransliterationSystem.ZHUYIN -> formatToZhuyin()
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
}
private fun formatToPinyinWithToneNumbers() = when (this) {
FIRST -> "1"
SECOND -> "2"
THIRD -> "3"
FORTH -> "4"
FIFTH -> "5"
}
private fun formatToPinyinWithToneMarks() = when (this) {
FIRST -> "\u0304"
SECOND -> "\u0301"
THIRD -> "\u030C"
FORTH -> "\u0300"
FIFTH -> ""
}
private fun formatToZhuyin() = when (this) {
FIRST -> ""
SECOND -> "ˊ"
THIRD -> "ˇ"
FORTH -> "ˋ"
FIFTH -> "˙"
}
}

View File

@ -0,0 +1,5 @@
package com.marvinelsen.chinese.transliteration
enum class TransliterationSystem {
ZHUYIN, PINYIN_WITH_TONE_NUMBERS, PINYIN_WITH_TONE_MARKS
}

View File

@ -0,0 +1,5 @@
package com.marvinelsen.chinese.transliteration
object Zhuyin {
const val SEPARATOR = " "
}

View File

@ -0,0 +1,434 @@
a ㄚ
ai ㄞ
an ㄢ
ang ㄤ
ao ㄠ
ba ㄅㄚ
bai ㄅㄞ
ban ㄅㄢ
bang ㄅㄤ
bao ㄅㄠ
bei ㄅㄟ
ben ㄅㄣ
beng ㄅㄥ
bi ㄅㄧ
biu ㄅㄧㄡ
bia ㄅㄧㄚ
bian ㄅㄧㄢ
biang ㄅㄧㄤ
biao ㄅㄧㄠ
bie ㄅㄧㄝ
bin ㄅㄧㄣ
bing ㄅㄧㄥ
bo ㄅㄛ
bu ㄅㄨ
ca ㄘㄚ
cai ㄘㄞ
can ㄘㄢ
cang ㄘㄤ
cao ㄘㄠ
ce ㄘㄜ
cen ㄘㄣ
ceng ㄘㄥ
cha ㄔㄚ
chai ㄔㄞ
chan ㄔㄢ
chang ㄔㄤ
chao ㄔㄠ
che ㄔㄜ
chen ㄔㄣ
cheng ㄔㄥ
chi ㄔ
chong ㄔㄨㄥ
chou ㄔㄡ
chu ㄔㄨ
chua ㄔㄨㄚ
chuai ㄔㄨㄞ
chuan ㄔㄨㄢ
chuang ㄔㄨㄤ
chui ㄔㄨㄟ
chun ㄔㄨㄣ
chuo ㄔㄨㄛ
ci ㄘ
cong ㄘㄨㄥ
cou ㄘㄡ
cu ㄘㄨ
cuan ㄘㄨㄢ
cui ㄘㄨㄟ
cun ㄘㄨㄣ
cuo ㄘㄨㄛ
da ㄉㄚ
dai ㄉㄞ
dan ㄉㄢ
dang ㄉㄤ
dao ㄉㄠ
de ㄉㄜ
dei ㄉㄟ
den ㄉㄣ
deng ㄉㄥ
di ㄉㄧ
dia ㄉㄧㄚ
dian ㄉㄧㄢ
diang ㄉㄧㄤ
diao ㄉㄧㄠ
die ㄉㄧㄝ
ding ㄉㄧㄥ
diu ㄉㄧㄡ
dong ㄉㄨㄥ
dou ㄉㄡ
du ㄉㄨ
duan ㄉㄨㄢ
dui ㄉㄨㄟ
dun ㄉㄨㄣ
duo ㄉㄨㄛ
e ㄜ
ei ㄟ
en ㄣ
eng ㄥ
r ㄦ
er ㄦ
fa ㄈㄚ
fan ㄈㄢ
fang ㄈㄤ
fei ㄈㄟ
fen ㄈㄣ
feng ㄈㄥ
fo ㄈㄛ
fou ㄈㄡ
fu ㄈㄨ
fiao ㄈㄧㄠ
ga ㄍㄚ
gai ㄍㄞ
gan ㄍㄢ
gang ㄍㄤ
gao ㄍㄠ
ge ㄍㄜ
gei ㄍㄟ
gen ㄍㄣ
geng ㄍㄥ
gong ㄍㄨㄥ
ging ㄍㄧㄥ
gou ㄍㄡ
gu ㄍㄨ
gua ㄍㄨㄚ
guai ㄍㄨㄞ
guan ㄍㄨㄢ
guang ㄍㄨㄤ
gui ㄍㄨㄟ
gun ㄍㄨㄣ
guo ㄍㄨㄛ
ha ㄏㄚ
hai ㄏㄞ
han ㄏㄢ
hang ㄏㄤ
hao ㄏㄠ
he ㄏㄜ
hei ㄏㄟ
hen ㄏㄣ
heng ㄏㄥ
hong ㄏㄨㄥ
hou ㄏㄡ
hu ㄏㄨ
hua ㄏㄨㄚ
huai ㄏㄨㄞ
huan ㄏㄨㄢ
huang ㄏㄨㄤ
hui ㄏㄨㄟ
hun ㄏㄨㄣ
huo ㄏㄨㄛ
hue ㄏㄨㄜ
ji ㄐㄧ
jia ㄐㄧㄚ
jian ㄐㄧㄢ
jiang ㄐㄧㄤ
jiao ㄐㄧㄠ
jie ㄐㄧㄝ
jin ㄐㄧㄣ
jing ㄐㄧㄥ
jiong ㄐㄩㄥ
jiu ㄐㄧㄡ
ju ㄐㄩ
juan ㄐㄩㄢ
jue ㄐㄩㄝ
jun ㄐㄩㄣ
ka ㄎㄚ
kai ㄎㄞ
kan ㄎㄢ
kang ㄎㄤ
kao ㄎㄠ
ke ㄎㄜ
kei ㄎㄟ
ken ㄎㄣ
keng ㄎㄥ
kong ㄎㄨㄥ
kou ㄎㄡ
ku ㄎㄨ
kua ㄎㄨㄚ
kuai ㄎㄨㄞ
kuan ㄎㄨㄢ
kuang ㄎㄨㄤ
kui ㄎㄨㄟ
kun ㄎㄨㄣ
kuo ㄎㄨㄛ
la ㄌㄚ
lai ㄌㄞ
lan ㄌㄢ
lang ㄌㄤ
lao ㄌㄠ
le ㄌㄜ
lei ㄌㄟ
leng ㄌㄥ
li ㄌㄧ
lia ㄌㄧㄚ
lian ㄌㄧㄢ
liang ㄌㄧㄤ
liao ㄌㄧㄠ
lie ㄌㄧㄝ
lin ㄌㄧㄣ
ling ㄌㄧㄥ
liu ㄌㄧㄡ
lo ㄌㄛ
long ㄌㄨㄥ
lou ㄌㄡ
lu ㄌㄨ
luan ㄌㄨㄢ
lun ㄌㄨㄣ
luo ㄌㄨㄛ
lu: ㄌㄩ
lv ㄌㄩ
lü ㄌㄩ
lu:e ㄌㄩㄝ
lve ㄌㄩㄝ
lüe ㄌㄩㄝ
lu:n ㄌㄩㄣ
lvn ㄌㄩㄣ
lün ㄌㄩㄣ
m ㄇ
ma ㄇㄚ
mai ㄇㄞ
man ㄇㄢ
mang ㄇㄤ
mao ㄇㄠ
me ㄇㄜ
mei ㄇㄟ
men ㄇㄣ
meng ㄇㄥ
mi ㄇㄧ
mian ㄇㄧㄢ
miao ㄇㄧㄠ
mie ㄇㄧㄝ
min ㄇㄧㄣ
ming ㄇㄧㄥ
miu ㄇㄧㄡ
mo ㄇㄛ
mou ㄇㄡ
mu ㄇㄨ
na ㄋㄚ
nai ㄋㄞ
nan ㄋㄢ
nang ㄋㄤ
nao ㄋㄠ
ne ㄋㄜ
nei ㄋㄟ
nen ㄋㄣ
neng ㄋㄥ
ni ㄋㄧ
nia ㄋㄧㄚ
nian ㄋㄧㄢ
niang ㄋㄧㄤ
niao ㄋㄧㄠ
nie ㄋㄧㄝ
nin ㄋㄧㄣ
ning ㄋㄧㄥ
niu ㄋㄧㄡ
nong ㄋㄨㄥ
nou ㄋㄡ
nu ㄋㄨ
nuan ㄋㄨㄢ
nun ㄋㄨㄣ
nuo ㄋㄨㄛ
nu: ㄋㄩ
nv ㄋㄩ
nü ㄋㄩ
nu:e ㄋㄩㄝ
nve ㄋㄩㄝ
nüe ㄋㄩㄝ
o ㄛ
ou ㄡ
pa ㄆㄚ
pai ㄆㄞ
pan ㄆㄢ
pang ㄆㄤ
pao ㄆㄠ
pei ㄆㄟ
pen ㄆㄣ
peng ㄆㄥ
pi ㄆㄧ
pian ㄆㄧㄢ
piao ㄆㄧㄠ
pie ㄆㄧㄝ
pin ㄆㄧㄣ
ping ㄆㄧㄥ
po ㄆㄛ
pou ㄆㄡ
pu ㄆㄨ
qi ㄑㄧ
qia ㄑㄧㄚ
qian ㄑㄧㄢ
qiang ㄑㄧㄤ
qiao ㄑㄧㄠ
qie ㄑㄧㄝ
qin ㄑㄧㄣ
qing ㄑㄧㄥ
qiong ㄑㄩㄥ
qiu ㄑㄧㄡ
qu ㄑㄩ
quan ㄑㄩㄢ
que ㄑㄩㄝ
qun ㄑㄩㄣ
ran ㄖㄢ
rang ㄖㄤ
rao ㄖㄠ
re ㄖㄜ
ren ㄖㄣ
reng ㄖㄥ
ri ㄖ
rong ㄖㄨㄥ
rou ㄖㄡ
ru ㄖㄨ
ruan ㄖㄨㄢ
rui ㄖㄨㄟ
run ㄖㄨㄣ
ruo ㄖㄨㄛ
sa ㄙㄚ
sai ㄙㄞ
san ㄙㄢ
sang ㄙㄤ
sao ㄙㄠ
se ㄙㄜ
sei ㄙㄟ
sen ㄙㄣ
seng ㄙㄥ
sha ㄕㄚ
shai ㄕㄞ
shan ㄕㄢ
shang ㄕㄤ
shao ㄕㄠ
she ㄕㄜ
shei ㄕㄟ
shen ㄕㄣ
sheng ㄕㄥ
shi ㄕ
shong ㄕㄨㄥ
shou ㄕㄡ
shu ㄕㄨ
shua ㄕㄨㄚ
shuai ㄕㄨㄞ
shuan ㄕㄨㄢ
shuang ㄕㄨㄤ
shui ㄕㄨㄟ
shun ㄕㄨㄣ
shuo ㄕㄨㄛ
si ㄙ
song ㄙㄨㄥ
sou ㄙㄡ
su ㄙㄨ
suan ㄙㄨㄢ
sui ㄙㄨㄟ
sun ㄙㄨㄣ
suo ㄙㄨㄛ
ta ㄊㄚ
tai ㄊㄞ
tan ㄊㄢ
tang ㄊㄤ
tao ㄊㄠ
te ㄊㄜ
tei ㄊㄟ
teng ㄊㄥ
ti ㄊㄧ
tian ㄊㄧㄢ
tiao ㄊㄧㄠ
tie ㄊㄧㄝ
ting ㄊㄧㄥ
tong ㄊㄨㄥ
tou ㄊㄡ
tu ㄊㄨ
tuan ㄊㄨㄢ
tui ㄊㄨㄟ
tun ㄊㄨㄣ
tuo ㄊㄨㄛ
wa ㄨㄚ
wai ㄨㄞ
wan ㄨㄢ
wang ㄨㄤ
wei ㄨㄟ
wen ㄨㄣ
weng ㄨㄥ
wo ㄨㄛ
wu ㄨ
xi ㄒㄧ
xia ㄒㄧㄚ
xian ㄒㄧㄢ
xiang ㄒㄧㄤ
xiao ㄒㄧㄠ
xie ㄒㄧㄝ
xin ㄒㄧㄣ
xing ㄒㄧㄥ
xiong ㄒㄩㄥ
xiu ㄒㄧㄡ
xu ㄒㄩ
xuan ㄒㄩㄢ
xue ㄒㄩㄝ
xun ㄒㄩㄣ
ya ㄧㄚ
yan ㄧㄢ
yang ㄧㄤ
yao ㄧㄠ
ye ㄧㄝ
yi ㄧ
yin ㄧㄣ
ying ㄧㄥ
yo ㄧㄛ
yong ㄩㄥ
you ㄧㄡ
yu ㄩ
yuan ㄩㄢ
yue ㄩㄝ
yun ㄩㄣ
za ㄗㄚ
zai ㄗㄞ
zan ㄗㄢ
zang ㄗㄤ
zao ㄗㄠ
ze ㄗㄜ
zei ㄗㄟ
zen ㄗㄣ
zeng ㄗㄥ
zha ㄓㄚ
zhai ㄓㄞ
zhan ㄓㄢ
zhang ㄓㄤ
zhao ㄓㄠ
zhe ㄓㄜ
zhei ㄓㄟ
zhen ㄓㄣ
zheng ㄓㄥ
zhi ㄓ
zhong ㄓㄨㄥ
zhou ㄓㄡ
zhu ㄓㄨ
zhua ㄓㄨㄚ
zhuai ㄓㄨㄞ
zhuan ㄓㄨㄢ
zhuang ㄓㄨㄤ
zhui ㄓㄨㄟ
zhun ㄓㄨㄣ
zhuo ㄓㄨㄛ
zi ㄗ
zong ㄗㄨㄥ
zou ㄗㄡ
zu ㄗㄨ
zuan ㄗㄨㄢ
zui ㄗㄨㄟ
zun ㄗㄨㄣ
zuo ㄗㄨㄛ
1 a
2 ai
3 an
4 ang
5 ao
6 ba ㄅㄚ
7 bai ㄅㄞ
8 ban ㄅㄢ
9 bang ㄅㄤ
10 bao ㄅㄠ
11 bei ㄅㄟ
12 ben ㄅㄣ
13 beng ㄅㄥ
14 bi ㄅㄧ
15 biu ㄅㄧㄡ
16 bia ㄅㄧㄚ
17 bian ㄅㄧㄢ
18 biang ㄅㄧㄤ
19 biao ㄅㄧㄠ
20 bie ㄅㄧㄝ
21 bin ㄅㄧㄣ
22 bing ㄅㄧㄥ
23 bo ㄅㄛ
24 bu ㄅㄨ
25 ca ㄘㄚ
26 cai ㄘㄞ
27 can ㄘㄢ
28 cang ㄘㄤ
29 cao ㄘㄠ
30 ce ㄘㄜ
31 cen ㄘㄣ
32 ceng ㄘㄥ
33 cha ㄔㄚ
34 chai ㄔㄞ
35 chan ㄔㄢ
36 chang ㄔㄤ
37 chao ㄔㄠ
38 che ㄔㄜ
39 chen ㄔㄣ
40 cheng ㄔㄥ
41 chi
42 chong ㄔㄨㄥ
43 chou ㄔㄡ
44 chu ㄔㄨ
45 chua ㄔㄨㄚ
46 chuai ㄔㄨㄞ
47 chuan ㄔㄨㄢ
48 chuang ㄔㄨㄤ
49 chui ㄔㄨㄟ
50 chun ㄔㄨㄣ
51 chuo ㄔㄨㄛ
52 ci
53 cong ㄘㄨㄥ
54 cou ㄘㄡ
55 cu ㄘㄨ
56 cuan ㄘㄨㄢ
57 cui ㄘㄨㄟ
58 cun ㄘㄨㄣ
59 cuo ㄘㄨㄛ
60 da ㄉㄚ
61 dai ㄉㄞ
62 dan ㄉㄢ
63 dang ㄉㄤ
64 dao ㄉㄠ
65 de ㄉㄜ
66 dei ㄉㄟ
67 den ㄉㄣ
68 deng ㄉㄥ
69 di ㄉㄧ
70 dia ㄉㄧㄚ
71 dian ㄉㄧㄢ
72 diang ㄉㄧㄤ
73 diao ㄉㄧㄠ
74 die ㄉㄧㄝ
75 ding ㄉㄧㄥ
76 diu ㄉㄧㄡ
77 dong ㄉㄨㄥ
78 dou ㄉㄡ
79 du ㄉㄨ
80 duan ㄉㄨㄢ
81 dui ㄉㄨㄟ
82 dun ㄉㄨㄣ
83 duo ㄉㄨㄛ
84 e
85 ei
86 en
87 eng
88 r
89 er
90 fa ㄈㄚ
91 fan ㄈㄢ
92 fang ㄈㄤ
93 fei ㄈㄟ
94 fen ㄈㄣ
95 feng ㄈㄥ
96 fo ㄈㄛ
97 fou ㄈㄡ
98 fu ㄈㄨ
99 fiao ㄈㄧㄠ
100 ga ㄍㄚ
101 gai ㄍㄞ
102 gan ㄍㄢ
103 gang ㄍㄤ
104 gao ㄍㄠ
105 ge ㄍㄜ
106 gei ㄍㄟ
107 gen ㄍㄣ
108 geng ㄍㄥ
109 gong ㄍㄨㄥ
110 ging ㄍㄧㄥ
111 gou ㄍㄡ
112 gu ㄍㄨ
113 gua ㄍㄨㄚ
114 guai ㄍㄨㄞ
115 guan ㄍㄨㄢ
116 guang ㄍㄨㄤ
117 gui ㄍㄨㄟ
118 gun ㄍㄨㄣ
119 guo ㄍㄨㄛ
120 ha ㄏㄚ
121 hai ㄏㄞ
122 han ㄏㄢ
123 hang ㄏㄤ
124 hao ㄏㄠ
125 he ㄏㄜ
126 hei ㄏㄟ
127 hen ㄏㄣ
128 heng ㄏㄥ
129 hong ㄏㄨㄥ
130 hou ㄏㄡ
131 hu ㄏㄨ
132 hua ㄏㄨㄚ
133 huai ㄏㄨㄞ
134 huan ㄏㄨㄢ
135 huang ㄏㄨㄤ
136 hui ㄏㄨㄟ
137 hun ㄏㄨㄣ
138 huo ㄏㄨㄛ
139 hue ㄏㄨㄜ
140 ji ㄐㄧ
141 jia ㄐㄧㄚ
142 jian ㄐㄧㄢ
143 jiang ㄐㄧㄤ
144 jiao ㄐㄧㄠ
145 jie ㄐㄧㄝ
146 jin ㄐㄧㄣ
147 jing ㄐㄧㄥ
148 jiong ㄐㄩㄥ
149 jiu ㄐㄧㄡ
150 ju ㄐㄩ
151 juan ㄐㄩㄢ
152 jue ㄐㄩㄝ
153 jun ㄐㄩㄣ
154 ka ㄎㄚ
155 kai ㄎㄞ
156 kan ㄎㄢ
157 kang ㄎㄤ
158 kao ㄎㄠ
159 ke ㄎㄜ
160 kei ㄎㄟ
161 ken ㄎㄣ
162 keng ㄎㄥ
163 kong ㄎㄨㄥ
164 kou ㄎㄡ
165 ku ㄎㄨ
166 kua ㄎㄨㄚ
167 kuai ㄎㄨㄞ
168 kuan ㄎㄨㄢ
169 kuang ㄎㄨㄤ
170 kui ㄎㄨㄟ
171 kun ㄎㄨㄣ
172 kuo ㄎㄨㄛ
173 la ㄌㄚ
174 lai ㄌㄞ
175 lan ㄌㄢ
176 lang ㄌㄤ
177 lao ㄌㄠ
178 le ㄌㄜ
179 lei ㄌㄟ
180 leng ㄌㄥ
181 li ㄌㄧ
182 lia ㄌㄧㄚ
183 lian ㄌㄧㄢ
184 liang ㄌㄧㄤ
185 liao ㄌㄧㄠ
186 lie ㄌㄧㄝ
187 lin ㄌㄧㄣ
188 ling ㄌㄧㄥ
189 liu ㄌㄧㄡ
190 lo ㄌㄛ
191 long ㄌㄨㄥ
192 lou ㄌㄡ
193 lu ㄌㄨ
194 luan ㄌㄨㄢ
195 lun ㄌㄨㄣ
196 luo ㄌㄨㄛ
197 lu: ㄌㄩ
198 lv ㄌㄩ
199 ㄌㄩ
200 lu:e ㄌㄩㄝ
201 lve ㄌㄩㄝ
202 lüe ㄌㄩㄝ
203 lu:n ㄌㄩㄣ
204 lvn ㄌㄩㄣ
205 lün ㄌㄩㄣ
206 m
207 ma ㄇㄚ
208 mai ㄇㄞ
209 man ㄇㄢ
210 mang ㄇㄤ
211 mao ㄇㄠ
212 me ㄇㄜ
213 mei ㄇㄟ
214 men ㄇㄣ
215 meng ㄇㄥ
216 mi ㄇㄧ
217 mian ㄇㄧㄢ
218 miao ㄇㄧㄠ
219 mie ㄇㄧㄝ
220 min ㄇㄧㄣ
221 ming ㄇㄧㄥ
222 miu ㄇㄧㄡ
223 mo ㄇㄛ
224 mou ㄇㄡ
225 mu ㄇㄨ
226 na ㄋㄚ
227 nai ㄋㄞ
228 nan ㄋㄢ
229 nang ㄋㄤ
230 nao ㄋㄠ
231 ne ㄋㄜ
232 nei ㄋㄟ
233 nen ㄋㄣ
234 neng ㄋㄥ
235 ni ㄋㄧ
236 nia ㄋㄧㄚ
237 nian ㄋㄧㄢ
238 niang ㄋㄧㄤ
239 niao ㄋㄧㄠ
240 nie ㄋㄧㄝ
241 nin ㄋㄧㄣ
242 ning ㄋㄧㄥ
243 niu ㄋㄧㄡ
244 nong ㄋㄨㄥ
245 nou ㄋㄡ
246 nu ㄋㄨ
247 nuan ㄋㄨㄢ
248 nun ㄋㄨㄣ
249 nuo ㄋㄨㄛ
250 nu: ㄋㄩ
251 nv ㄋㄩ
252 ㄋㄩ
253 nu:e ㄋㄩㄝ
254 nve ㄋㄩㄝ
255 nüe ㄋㄩㄝ
256 o
257 ou
258 pa ㄆㄚ
259 pai ㄆㄞ
260 pan ㄆㄢ
261 pang ㄆㄤ
262 pao ㄆㄠ
263 pei ㄆㄟ
264 pen ㄆㄣ
265 peng ㄆㄥ
266 pi ㄆㄧ
267 pian ㄆㄧㄢ
268 piao ㄆㄧㄠ
269 pie ㄆㄧㄝ
270 pin ㄆㄧㄣ
271 ping ㄆㄧㄥ
272 po ㄆㄛ
273 pou ㄆㄡ
274 pu ㄆㄨ
275 qi ㄑㄧ
276 qia ㄑㄧㄚ
277 qian ㄑㄧㄢ
278 qiang ㄑㄧㄤ
279 qiao ㄑㄧㄠ
280 qie ㄑㄧㄝ
281 qin ㄑㄧㄣ
282 qing ㄑㄧㄥ
283 qiong ㄑㄩㄥ
284 qiu ㄑㄧㄡ
285 qu ㄑㄩ
286 quan ㄑㄩㄢ
287 que ㄑㄩㄝ
288 qun ㄑㄩㄣ
289 ran ㄖㄢ
290 rang ㄖㄤ
291 rao ㄖㄠ
292 re ㄖㄜ
293 ren ㄖㄣ
294 reng ㄖㄥ
295 ri
296 rong ㄖㄨㄥ
297 rou ㄖㄡ
298 ru ㄖㄨ
299 ruan ㄖㄨㄢ
300 rui ㄖㄨㄟ
301 run ㄖㄨㄣ
302 ruo ㄖㄨㄛ
303 sa ㄙㄚ
304 sai ㄙㄞ
305 san ㄙㄢ
306 sang ㄙㄤ
307 sao ㄙㄠ
308 se ㄙㄜ
309 sei ㄙㄟ
310 sen ㄙㄣ
311 seng ㄙㄥ
312 sha ㄕㄚ
313 shai ㄕㄞ
314 shan ㄕㄢ
315 shang ㄕㄤ
316 shao ㄕㄠ
317 she ㄕㄜ
318 shei ㄕㄟ
319 shen ㄕㄣ
320 sheng ㄕㄥ
321 shi
322 shong ㄕㄨㄥ
323 shou ㄕㄡ
324 shu ㄕㄨ
325 shua ㄕㄨㄚ
326 shuai ㄕㄨㄞ
327 shuan ㄕㄨㄢ
328 shuang ㄕㄨㄤ
329 shui ㄕㄨㄟ
330 shun ㄕㄨㄣ
331 shuo ㄕㄨㄛ
332 si
333 song ㄙㄨㄥ
334 sou ㄙㄡ
335 su ㄙㄨ
336 suan ㄙㄨㄢ
337 sui ㄙㄨㄟ
338 sun ㄙㄨㄣ
339 suo ㄙㄨㄛ
340 ta ㄊㄚ
341 tai ㄊㄞ
342 tan ㄊㄢ
343 tang ㄊㄤ
344 tao ㄊㄠ
345 te ㄊㄜ
346 tei ㄊㄟ
347 teng ㄊㄥ
348 ti ㄊㄧ
349 tian ㄊㄧㄢ
350 tiao ㄊㄧㄠ
351 tie ㄊㄧㄝ
352 ting ㄊㄧㄥ
353 tong ㄊㄨㄥ
354 tou ㄊㄡ
355 tu ㄊㄨ
356 tuan ㄊㄨㄢ
357 tui ㄊㄨㄟ
358 tun ㄊㄨㄣ
359 tuo ㄊㄨㄛ
360 wa ㄨㄚ
361 wai ㄨㄞ
362 wan ㄨㄢ
363 wang ㄨㄤ
364 wei ㄨㄟ
365 wen ㄨㄣ
366 weng ㄨㄥ
367 wo ㄨㄛ
368 wu
369 xi ㄒㄧ
370 xia ㄒㄧㄚ
371 xian ㄒㄧㄢ
372 xiang ㄒㄧㄤ
373 xiao ㄒㄧㄠ
374 xie ㄒㄧㄝ
375 xin ㄒㄧㄣ
376 xing ㄒㄧㄥ
377 xiong ㄒㄩㄥ
378 xiu ㄒㄧㄡ
379 xu ㄒㄩ
380 xuan ㄒㄩㄢ
381 xue ㄒㄩㄝ
382 xun ㄒㄩㄣ
383 ya ㄧㄚ
384 yan ㄧㄢ
385 yang ㄧㄤ
386 yao ㄧㄠ
387 ye ㄧㄝ
388 yi
389 yin ㄧㄣ
390 ying ㄧㄥ
391 yo ㄧㄛ
392 yong ㄩㄥ
393 you ㄧㄡ
394 yu
395 yuan ㄩㄢ
396 yue ㄩㄝ
397 yun ㄩㄣ
398 za ㄗㄚ
399 zai ㄗㄞ
400 zan ㄗㄢ
401 zang ㄗㄤ
402 zao ㄗㄠ
403 ze ㄗㄜ
404 zei ㄗㄟ
405 zen ㄗㄣ
406 zeng ㄗㄥ
407 zha ㄓㄚ
408 zhai ㄓㄞ
409 zhan ㄓㄢ
410 zhang ㄓㄤ
411 zhao ㄓㄠ
412 zhe ㄓㄜ
413 zhei ㄓㄟ
414 zhen ㄓㄣ
415 zheng ㄓㄥ
416 zhi
417 zhong ㄓㄨㄥ
418 zhou ㄓㄡ
419 zhu ㄓㄨ
420 zhua ㄓㄨㄚ
421 zhuai ㄓㄨㄞ
422 zhuan ㄓㄨㄢ
423 zhuang ㄓㄨㄤ
424 zhui ㄓㄨㄟ
425 zhun ㄓㄨㄣ
426 zhuo ㄓㄨㄛ
427 zi
428 zong ㄗㄨㄥ
429 zou ㄗㄡ
430 zu ㄗㄨ
431 zuan ㄗㄨㄢ
432 zui ㄗㄨㄟ
433 zun ㄗㄨㄣ
434 zuo ㄗㄨㄛ

View File

@ -0,0 +1,104 @@
package com.marvinelsen.chinese.transliteration
import io.kotest.core.spec.style.ShouldSpec
import io.kotest.datatest.withData
import io.kotest.matchers.shouldBe
class SyllableTest : ShouldSpec({
context("from pinyin with tone numbers") {
withData(
nameFn = { "'${it.first}' -> ${it.second}" },
"sheng1" to Syllable("sheng", Tone.FIRST),
"zhi2" to Syllable("zhi", Tone.SECOND),
"ka3" to Syllable("ka", Tone.THIRD),
"yao4" to Syllable("yao", Tone.FORTH),
"me5" to Syllable("me", Tone.FIFTH),
"Me5" to Syllable("Me", Tone.FIFTH),
"nv3" to Syllable("nv", Tone.THIRD),
"nü3" to Syllable("", Tone.THIRD),
"nu:3" to Syllable("nu:", Tone.THIRD),
) { (pinyinWithNumber, expectedSyllable) ->
Syllable.fromPinyinWithToneNumber(pinyinWithNumber) shouldBe expectedSyllable
}
}
context("from zhuyin") {
withData(
nameFn = { "'${it.first}' -> ${it.second}" },
"ㄕㄥ" to Syllable("sheng", Tone.FIRST),
"ㄓˊ" to Syllable("zhi", Tone.SECOND),
"ㄎㄚˇ" to Syllable("ka", Tone.THIRD),
"ㄧㄠˋ" to Syllable("yao", Tone.FORTH),
"ㄇㄜ˙" to Syllable("me", Tone.FIFTH),
"˙ㄇㄜ" to Syllable("me", Tone.FIFTH),
"ㄋㄩˇ" to Syllable("", Tone.THIRD),
) { (zhuyin, expectedSyllable) ->
Syllable.fromZhuyin(zhuyin) shouldBe expectedSyllable
}
}
context("from invalid pinyin with tone numbers") {
}
context("from invalid zhuyin") {
}
context("format to zhuyin") {
withData(
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
Syllable("sheng", Tone.FIRST) to "ㄕㄥ",
Syllable("zhi", Tone.SECOND) to "ㄓˊ",
Syllable("ka", Tone.THIRD) to "ㄎㄚˇ",
Syllable("yao", Tone.FORTH) to "ㄧㄠˋ",
Syllable("me", Tone.FIFTH) to "˙ㄇㄜ",
Syllable("", Tone.THIRD) to "ㄋㄩˇ",
Syllable("nu:", Tone.THIRD) to "ㄋㄩˇ",
Syllable("nv", Tone.THIRD) to "ㄋㄩˇ",
) { (syllable, expectedZhuyin) ->
syllable.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyin
}
}
context("format to pinyin with tone numbers") {
withData(
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
Syllable("sheng", Tone.FIRST) to "sheng1",
Syllable("zhi", Tone.SECOND) to "zhi2",
Syllable("ka", Tone.THIRD) to "ka3",
Syllable("yao", Tone.FORTH) to "yao4",
Syllable("me", Tone.FIFTH) to "me5",
Syllable("", Tone.THIRD) to "nü3",
Syllable("nu:", Tone.THIRD) to "nu:3",
Syllable("nv", Tone.THIRD) to "nv3",
) { (syllable, expectedPinyinWithToneNumbers) ->
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedPinyinWithToneNumbers
}
}
context("format to pinyin with tone marks") {
withData(
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
Syllable("sheng", Tone.FIRST) to "shēng",
Syllable("zhi", Tone.SECOND) to "zhí",
Syllable("ka", Tone.THIRD) to "kǎ",
Syllable("yao", Tone.FORTH) to "yào",
Syllable("me", Tone.FIFTH) to "me",
Syllable("zhui", Tone.FIRST) to "zhuī",
Syllable("liu", Tone.FIRST) to "liū",
Syllable("", Tone.THIRD) to "nǚ",
Syllable("nu:", Tone.THIRD) to "nǚ",
Syllable("nv", Tone.THIRD) to "nǚ",
) { (syllable, expectedPinyinWithToneMarks) ->
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedPinyinWithToneMarks
}
}
context("format to zhuyin with invalid pinyin syllable") {
}
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
}
context("format to pinyin with tone numbers with invalid pinyin syllable") {
}
})

View File

@ -0,0 +1,187 @@
package com.marvinelsen.chinese.transliteration
import io.kotest.assertions.throwables.shouldThrow
import io.kotest.core.spec.style.ShouldSpec
import io.kotest.datatest.withData
import io.kotest.matchers.nulls.shouldBeNull
import io.kotest.matchers.shouldBe
class ToneTest : ShouldSpec({
context("convert correctly from digit") {
withData(
nameFn = { "'${it.first}' -> ${it.second}" },
'1' to Tone.FIRST,
'2' to Tone.SECOND,
'3' to Tone.THIRD,
'4' to Tone.FORTH,
'5' to Tone.FIFTH,
) { (digit, expectedTone) ->
Tone.fromDigit(digit) shouldBe expectedTone
}
}
context("convert correctly from Int") {
withData(
nameFn = { "'${it.first}' -> ${it.second}" },
1 to Tone.FIRST,
2 to Tone.SECOND,
3 to Tone.THIRD,
4 to Tone.FORTH,
5 to Tone.FIFTH,
) { (number, expectedTone) ->
Tone.fromInt(number) shouldBe expectedTone
}
}
context("convert correctly from Zhuyin tone mark") {
withData(
nameFn = { "'${it.first}' -> ${it.second}" },
'ˊ' to Tone.SECOND,
'ˇ' to Tone.THIRD,
'ˋ' to Tone.FORTH,
'˙' to Tone.FIFTH,
) { (zhuyinToneMark, expectedTone) ->
Tone.fromZhuyinToneMark(zhuyinToneMark) shouldBe expectedTone
}
}
context("convert correctly to Int") {
withData(
nameFn = { "${it.first} -> '${it.second}'" },
Tone.FIRST to 1,
Tone.SECOND to 2,
Tone.THIRD to 3,
Tone.FORTH to 4,
Tone.FIFTH to 5,
) { (tone, expectedInteger) ->
tone.toInt() shouldBe expectedInteger
}
}
context("return null when converting from invalid digit ") {
withData(
nameFn = { "'$it' -> null" },
'0',
'6',
'a',
'z',
'$',
'*',
) { invalidDigit ->
Tone.fromDigitOrNull(invalidDigit).shouldBeNull()
}
}
context("return null when converting from invalid int ") {
withData(
nameFn = { "'$it' -> throws exception" },
0,
6,
-1,
Int.MAX_VALUE,
Int.MIN_VALUE,
) { invalidNumber ->
Tone.fromIntOrNull(invalidNumber).shouldBeNull()
}
}
context("return null when converting from invalid Zhuyin tone mark ") {
withData(
nameFn = { "'$it' -> throws exception" },
'0',
'6',
'a',
'z',
'$',
'*',
) { invalidZhuyinToneMark ->
Tone.fromZhuyinToneMarkOrNull(invalidZhuyinToneMark).shouldBeNull()
}
}
context("throw exception when converting from invalid digit ") {
withData(
nameFn = { "'$it' -> throws exception" },
'0',
'6',
'a',
'z',
'$',
'*',
) { invalidDigit ->
shouldThrow<IllegalArgumentException> {
Tone.fromDigit(invalidDigit)
}
}
}
context("throw exception when converting from invalid int ") {
withData(
nameFn = { "'$it' -> throws exception" },
0,
6,
-1,
Int.MAX_VALUE,
Int.MIN_VALUE,
) { invalidNumber ->
shouldThrow<IllegalArgumentException> {
Tone.fromInt(invalidNumber)
}
}
}
context("throw exception when converting from invalid Zhuyin tone mark ") {
withData(
nameFn = { "'$it' -> throws exception" },
'0',
'6',
'a',
'z',
'$',
'*',
) { invalidZhuyinToneMark ->
shouldThrow<IllegalArgumentException> {
Tone.fromZhuyinToneMark(invalidZhuyinToneMark)
}
}
}
context("format to Zhuyin correctly") {
withData(
nameFn = { "${it.first} -> '${it.second}'" },
Tone.FIRST to "",
Tone.SECOND to "ˊ",
Tone.THIRD to "ˇ",
Tone.FORTH to "ˋ",
Tone.FIFTH to "˙",
) { (tone, expectedZhuyinToneMark) ->
tone.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyinToneMark
}
}
context("format to Pinyin with tone numbers correctly") {
withData(
nameFn = { "${it.first} -> '${it.second}'" },
Tone.FIRST to "1",
Tone.SECOND to "2",
Tone.THIRD to "3",
Tone.FORTH to "4",
Tone.FIFTH to "5",
) { (tone, expectedNumber) ->
tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedNumber
}
}
context("format to Pinyin with tone marks correctly") {
withData(
nameFn = { "${it.first} -> '${it.second}'" },
Tone.FIRST to "\u0304",
Tone.SECOND to "\u0301",
Tone.THIRD to "\u030C",
Tone.FORTH to "\u0300",
Tone.FIFTH to "",
) { (tone, expectedAccent) ->
tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedAccent
}
}
})