Compare commits

...

8 Commits

Author SHA1 Message Date
3f8be7049b
Make toCedictDefinitions method more concise
All checks were successful
Pull Request / build (pull_request) Successful in 2m16s
2024-09-24 12:05:57 +02:00
9fda246045
Close stream properly in test 2024-09-24 12:05:57 +02:00
ebf608b2fa
Rename test 2024-09-24 12:05:57 +02:00
9d3f1ec456
Rename CedictParserImpl to RegexCedictParser 2024-09-24 12:05:57 +02:00
200cd7d06b
Fix test 2024-09-24 12:05:57 +02:00
8592f4fe67
Bump version 2024-09-24 12:05:57 +02:00
73e627b6be
Return sequence instead of list from parse method 2024-09-24 12:05:57 +02:00
7c16182d61
Do not close input stream in parse method 2024-09-24 12:05:56 +02:00
4 changed files with 19 additions and 20 deletions

View File

@ -5,7 +5,7 @@ plugins {
}
group = "com.marvinelsen"
version = "1.0.1"
version = "2.0.0"
repositories {
mavenCentral()

View File

@ -1,12 +1,12 @@
package com.marvinelsen.cedict.api
import com.marvinelsen.cedict.internal.CedictParserImpl
import com.marvinelsen.cedict.internal.RegexCedictParser
import java.io.InputStream
interface CedictParser {
fun parseCedict(inputStream: InputStream): List<CedictEntry>
fun parseCedict(inputStream: InputStream): Sequence<CedictEntry>
companion object {
val instance: CedictParser by lazy { CedictParserImpl() }
val instance: CedictParser by lazy { RegexCedictParser() }
}
}

View File

@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser
import com.marvinelsen.chinese.transliteration.Syllable
import java.io.InputStream
internal class CedictParserImpl : CedictParser {
internal class RegexCedictParser : CedictParser {
companion object {
private const val DEFINITION_SEPARATOR = '/'
private const val GLOSS_SEPARATOR = ';'
@ -20,11 +20,11 @@ internal class CedictParserImpl : CedictParser {
}
override fun parseCedict(inputStream: InputStream) =
inputStream.bufferedReader().useLines { lines ->
lines.filterNot(::isComment)
.map(::toCedictEntry)
.toList()
}
inputStream
.bufferedReader()
.lineSequence()
.filterNot(::isComment)
.map(::toCedictEntry)
private fun isComment(line: String) = line[0] == COMMENT_MARKER
@ -48,9 +48,5 @@ internal class CedictParserImpl : CedictParser {
private fun toCedictDefinitions(definitions: String) = definitions
.split(DEFINITION_SEPARATOR)
.map {
CedictDefinition(
glosses = it.split(GLOSS_SEPARATOR).map(String::trim)
)
}
.map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) }
}

View File

@ -7,9 +7,9 @@ import io.kotest.core.spec.style.ShouldSpec
import io.kotest.matchers.shouldBe
import java.util.zip.GZIPInputStream
class CedictParserImplTest : ShouldSpec({
class RegexCedictParserTest : ShouldSpec({
should("parse lines correctly") {
val cedictParser = CedictParserImpl()
val cedictParser = RegexCedictParser()
val cedictEntry =
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
@ -29,9 +29,12 @@ class CedictParserImplTest : ShouldSpec({
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
val cedictParser = CedictParserImpl()
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
val cedictParser = RegexCedictParser()
cedictEntries.size shouldBe 122_508
cedictFileStream.use {
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
cedictEntries.toList().size shouldBe 122_508
}
}
})