Compare commits
9 Commits
78020d6843
...
c6347a6b13
Author | SHA1 | Date | |
---|---|---|---|
c6347a6b13 | |||
3f8be7049b | |||
9fda246045 | |||
ebf608b2fa | |||
9d3f1ec456 | |||
200cd7d06b | |||
8592f4fe67 | |||
73e627b6be | |||
7c16182d61 |
@ -5,7 +5,7 @@ plugins {
|
|||||||
}
|
}
|
||||||
|
|
||||||
group = "com.marvinelsen"
|
group = "com.marvinelsen"
|
||||||
version = "1.0.1"
|
version = "2.0.0"
|
||||||
|
|
||||||
repositories {
|
repositories {
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
package com.marvinelsen.cedict.api
|
package com.marvinelsen.cedict.api
|
||||||
|
|
||||||
import com.marvinelsen.cedict.internal.CedictParserImpl
|
import com.marvinelsen.cedict.internal.RegexCedictParser
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
interface CedictParser {
|
interface CedictParser {
|
||||||
fun parseCedict(inputStream: InputStream): List<CedictEntry>
|
fun parseCedict(inputStream: InputStream): Sequence<CedictEntry>
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
val instance: CedictParser by lazy { CedictParserImpl() }
|
val instance: CedictParser by lazy { RegexCedictParser() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser
|
|||||||
import com.marvinelsen.chinese.transliteration.Syllable
|
import com.marvinelsen.chinese.transliteration.Syllable
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
internal class CedictParserImpl : CedictParser {
|
internal class RegexCedictParser : CedictParser {
|
||||||
companion object {
|
companion object {
|
||||||
private const val DEFINITION_SEPARATOR = '/'
|
private const val DEFINITION_SEPARATOR = '/'
|
||||||
private const val GLOSS_SEPARATOR = ';'
|
private const val GLOSS_SEPARATOR = ';'
|
||||||
@ -20,11 +20,11 @@ internal class CedictParserImpl : CedictParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
override fun parseCedict(inputStream: InputStream) =
|
override fun parseCedict(inputStream: InputStream) =
|
||||||
inputStream.bufferedReader().useLines { lines ->
|
inputStream
|
||||||
lines.filterNot(::isComment)
|
.bufferedReader()
|
||||||
|
.lineSequence()
|
||||||
|
.filterNot(::isComment)
|
||||||
.map(::toCedictEntry)
|
.map(::toCedictEntry)
|
||||||
.toList()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun isComment(line: String) = line[0] == COMMENT_MARKER
|
private fun isComment(line: String) = line[0] == COMMENT_MARKER
|
||||||
|
|
||||||
@ -48,9 +48,5 @@ internal class CedictParserImpl : CedictParser {
|
|||||||
|
|
||||||
private fun toCedictDefinitions(definitions: String) = definitions
|
private fun toCedictDefinitions(definitions: String) = definitions
|
||||||
.split(DEFINITION_SEPARATOR)
|
.split(DEFINITION_SEPARATOR)
|
||||||
.map {
|
.map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) }
|
||||||
CedictDefinition(
|
|
||||||
glosses = it.split(GLOSS_SEPARATOR).map(String::trim)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
@ -7,9 +7,9 @@ import io.kotest.core.spec.style.ShouldSpec
|
|||||||
import io.kotest.matchers.shouldBe
|
import io.kotest.matchers.shouldBe
|
||||||
import java.util.zip.GZIPInputStream
|
import java.util.zip.GZIPInputStream
|
||||||
|
|
||||||
class CedictParserImplTest : ShouldSpec({
|
class RegexCedictParserTest : ShouldSpec({
|
||||||
should("parse lines correctly") {
|
should("parse lines correctly") {
|
||||||
val cedictParser = CedictParserImpl()
|
val cedictParser = RegexCedictParser()
|
||||||
val cedictEntry =
|
val cedictEntry =
|
||||||
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
|
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
|
||||||
|
|
||||||
@ -29,9 +29,12 @@ class CedictParserImplTest : ShouldSpec({
|
|||||||
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
|
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
|
||||||
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
|
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
|
||||||
|
|
||||||
val cedictParser = CedictParserImpl()
|
val cedictParser = RegexCedictParser()
|
||||||
|
|
||||||
|
cedictFileStream.use {
|
||||||
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
|
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
|
||||||
|
|
||||||
cedictEntries.size shouldBe 122_508
|
cedictEntries.toList().size shouldBe 122_508
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
Loading…
Reference in New Issue
Block a user