Compare commits
9 Commits
78020d6843
...
c6347a6b13
Author | SHA1 | Date | |
---|---|---|---|
c6347a6b13 | |||
3f8be7049b | |||
9fda246045 | |||
ebf608b2fa | |||
9d3f1ec456 | |||
200cd7d06b | |||
8592f4fe67 | |||
73e627b6be | |||
7c16182d61 |
@ -5,7 +5,7 @@ plugins {
|
||||
}
|
||||
|
||||
group = "com.marvinelsen"
|
||||
version = "1.0.1"
|
||||
version = "2.0.0"
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
|
@ -1,12 +1,12 @@
|
||||
package com.marvinelsen.cedict.api
|
||||
|
||||
import com.marvinelsen.cedict.internal.CedictParserImpl
|
||||
import com.marvinelsen.cedict.internal.RegexCedictParser
|
||||
import java.io.InputStream
|
||||
|
||||
interface CedictParser {
|
||||
fun parseCedict(inputStream: InputStream): List<CedictEntry>
|
||||
fun parseCedict(inputStream: InputStream): Sequence<CedictEntry>
|
||||
|
||||
companion object {
|
||||
val instance: CedictParser by lazy { CedictParserImpl() }
|
||||
val instance: CedictParser by lazy { RegexCedictParser() }
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser
|
||||
import com.marvinelsen.chinese.transliteration.Syllable
|
||||
import java.io.InputStream
|
||||
|
||||
internal class CedictParserImpl : CedictParser {
|
||||
internal class RegexCedictParser : CedictParser {
|
||||
companion object {
|
||||
private const val DEFINITION_SEPARATOR = '/'
|
||||
private const val GLOSS_SEPARATOR = ';'
|
||||
@ -20,11 +20,11 @@ internal class CedictParserImpl : CedictParser {
|
||||
}
|
||||
|
||||
override fun parseCedict(inputStream: InputStream) =
|
||||
inputStream.bufferedReader().useLines { lines ->
|
||||
lines.filterNot(::isComment)
|
||||
inputStream
|
||||
.bufferedReader()
|
||||
.lineSequence()
|
||||
.filterNot(::isComment)
|
||||
.map(::toCedictEntry)
|
||||
.toList()
|
||||
}
|
||||
|
||||
private fun isComment(line: String) = line[0] == COMMENT_MARKER
|
||||
|
||||
@ -48,9 +48,5 @@ internal class CedictParserImpl : CedictParser {
|
||||
|
||||
private fun toCedictDefinitions(definitions: String) = definitions
|
||||
.split(DEFINITION_SEPARATOR)
|
||||
.map {
|
||||
CedictDefinition(
|
||||
glosses = it.split(GLOSS_SEPARATOR).map(String::trim)
|
||||
)
|
||||
}
|
||||
.map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) }
|
||||
}
|
@ -7,9 +7,9 @@ import io.kotest.core.spec.style.ShouldSpec
|
||||
import io.kotest.matchers.shouldBe
|
||||
import java.util.zip.GZIPInputStream
|
||||
|
||||
class CedictParserImplTest : ShouldSpec({
|
||||
class RegexCedictParserTest : ShouldSpec({
|
||||
should("parse lines correctly") {
|
||||
val cedictParser = CedictParserImpl()
|
||||
val cedictParser = RegexCedictParser()
|
||||
val cedictEntry =
|
||||
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
|
||||
|
||||
@ -29,9 +29,12 @@ class CedictParserImplTest : ShouldSpec({
|
||||
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
|
||||
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
|
||||
|
||||
val cedictParser = CedictParserImpl()
|
||||
val cedictParser = RegexCedictParser()
|
||||
|
||||
cedictFileStream.use {
|
||||
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
|
||||
|
||||
cedictEntries.size shouldBe 122_508
|
||||
cedictEntries.toList().size shouldBe 122_508
|
||||
}
|
||||
}
|
||||
})
|
Loading…
Reference in New Issue
Block a user