Program Listing for File latex.ipp¶
↰ Return to documentation for file (SeQuant/core/latex.ipp
)
//
// Created by Eduard Valeyev on 3/30/18.
//
#ifndef SEQUANT_CORE_LATEX_IPP
#define SEQUANT_CORE_LATEX_IPP
#include <SeQuant/core/latex.hpp>
#include <SeQuant/core/wstring.hpp>
#include <map>
#include <optional>
#include <type_traits>
#include <vector>
namespace sequant {
namespace detail {
template <typename Char, typename Traits, typename Alloc>
std::basic_string<Char, Traits, Alloc> greek_characters_to_latex_impl(
std::basic_string_view<Char, Traits> str) {
// lower-case greek characters in the order of their appearance in Unicode
// chart https://www.unicode.org/charts/PDF/U0370.pdf
const std::vector<std::basic_string<Char, Traits, Alloc>> lc = {
SQ_STRLIT(Char, "\\alpha"), SQ_STRLIT(Char, "\\beta"),
SQ_STRLIT(Char, "\\gamma"), SQ_STRLIT(Char, "\\delta"),
SQ_STRLIT(Char, "\\epsilon"), SQ_STRLIT(Char, "\\zeta"),
SQ_STRLIT(Char, "\\eta"), SQ_STRLIT(Char, "\\theta"),
SQ_STRLIT(Char, "\\iota"), SQ_STRLIT(Char, "\\kappa"),
SQ_STRLIT(Char, "\\lambda"), SQ_STRLIT(Char, "\\mu"),
SQ_STRLIT(Char, "\\nu"), SQ_STRLIT(Char, "\\xi"),
SQ_STRLIT(Char, "o"), SQ_STRLIT(Char, "\\pi"),
SQ_STRLIT(Char, "\\rho"), SQ_STRLIT(Char, "\\varsigma"),
SQ_STRLIT(Char, "\\sigma"), SQ_STRLIT(Char, "\\tau"),
SQ_STRLIT(Char, "\\upsilon"), SQ_STRLIT(Char, "\\phi"),
SQ_STRLIT(Char, "\\chi"), SQ_STRLIT(Char, "\\psi"),
SQ_STRLIT(Char, "\\omega")};
const std::vector<std::basic_string<Char, Traits, Alloc>> uc = {
SQ_STRLIT(Char, "A"), SQ_STRLIT(Char, "B"),
SQ_STRLIT(Char, "\\Gamma"), SQ_STRLIT(Char, "\\Delta"),
SQ_STRLIT(Char, "E"), SQ_STRLIT(Char, "Z"),
SQ_STRLIT(Char, "H"), SQ_STRLIT(Char, "\\Theta"),
SQ_STRLIT(Char, "I"), SQ_STRLIT(Char, "K"),
SQ_STRLIT(Char, "\\Lambda"), SQ_STRLIT(Char, "M"),
SQ_STRLIT(Char, "N"), SQ_STRLIT(Char, "\\Xi"),
SQ_STRLIT(Char, "O"), SQ_STRLIT(Char, "\\Pi"),
SQ_STRLIT(Char, "P"), SQ_STRLIT(Char, ""),
SQ_STRLIT(Char, "\\Sigma"), SQ_STRLIT(Char, "T"),
SQ_STRLIT(Char, "\\Upsilon"), SQ_STRLIT(Char, "\\Phi"),
SQ_STRLIT(Char, "X"), SQ_STRLIT(Char, "\\Psi"),
SQ_STRLIT(Char, "\\Omega")};
auto is_lc = [](Char ch) {
return ch >= static_cast<Char>(0x3B1) // alpha
&& ch <= static_cast<Char>(0x3C9) // omega
;
};
auto is_uc = [](Char ch) {
return ch >= static_cast<Char>(0x391) // Alpha
&& ch <= static_cast<Char>(0x3A9) // Omega
&& ch != static_cast<Char>(0x3A2) // gap in the table
;
};
std::basic_string<Char, Traits, Alloc> result;
const auto begin = cbegin(str);
const auto end = cend(str);
for (auto it = begin; it != end; ++it) {
auto append = [&result, &str, &it, &begin](const auto& s) {
if (result.empty()) result = str.substr(0, it - begin);
result += s;
};
auto is_ascii = [](Char c) { return static_cast<unsigned int>(c) <= 0x7F; };
const Char ch = *it;
if (sizeof(Char) == 1 && !is_ascii(ch))
throw std::invalid_argument(
"greek_characters_to_latex<Char,...>(str): currently only supports "
"non-ASCII characters in str if Char is a wide character (wchar_t, "
"char16_t, or char32_t)");
// skip ASCII characters
if (!is_ascii(ch)) {
if (is_lc(ch)) {
const auto ch_addr = static_cast<long>(ch) - static_cast<long>(0x3B1);
assert(ch_addr >= 0 && static_cast<std::size_t>(ch_addr) < lc.size());
const auto& lc_str = lc[static_cast<std::size_t>(ch_addr)];
assert(lc_str.size() > 0);
append(lc_str);
} else if (is_uc(ch)) {
const auto ch_addr = static_cast<long>(ch) - static_cast<long>(0x391);
assert(ch_addr >= 0 && static_cast<std::size_t>(ch_addr) < uc.size());
const auto& uc_str = uc[static_cast<std::size_t>(ch_addr)];
assert(uc_str.size() > 0);
append(uc_str);
} else { // pass through unknown non-ASCII characters
if (!result.empty()) result.push_back(ch);
}
} else { // ASCII character
if (!result.empty()) result.push_back(ch);
}
}
if (!result.empty())
return result;
else
return decltype(result)(str);
}
template <typename Char, typename Traits, typename Alloc>
std::basic_string<Char, Traits, Alloc> diactrics_to_latex_impl(
std::basic_string_view<Char, Traits> str) {
using str_t = std::basic_string<Char, Traits, Alloc>;
str_t result;
const auto begin = cbegin(str);
const auto end = cend(str);
std::optional<Char> next_ch;
for (auto it = begin; it != end; ++it) {
auto append = [&result, &str, &it, &begin](const auto& s) {
if (result.empty()) result = str.substr(0, it - begin);
result += s;
};
auto is_ascii = [](Char c) { return static_cast<unsigned int>(c) <= 0x7F; };
const Char ch = *it;
if (it + 1 != end) next_ch = *(it + 1);
if (sizeof(Char) == 1 &&
((it == begin && !is_ascii(ch)) || (next_ch && !is_ascii(*next_ch)))) {
throw std::invalid_argument(
"diactrics_to_latex<Char,...>(str): currently only supports "
"non-ASCII characters in str if Char is a wide character (wchar_t, "
"char16_t, or char32_t)");
}
// Combining diacritics:
// https://www.ncbi.nlm.nih.gov/staff/beck/charents/accents.html
if (next_ch && !is_ascii(*next_ch)) {
auto append_latex = [&](auto prefix) {
append(prefix);
append(ch);
append(SQ_STRLIT(Char, "}"));
it += 1;
};
switch (static_cast<std::int64_t>(*next_ch)) {
// tilde
case 0x303:
append_latex(SQ_STRLIT(Char, "\\tilde{"));
continue;
// acute
case 0x301:
append_latex(SQ_STRLIT(Char, "\\acute{"));
continue;
// grave
case 0x300:
append_latex(SQ_STRLIT(Char, "\\grave{"));
continue;
// caron
case 0x30C:
append_latex(SQ_STRLIT(Char, "\\check{"));
continue;
}
}
if (!is_ascii(ch)) { // check for combined characters
{ // tilde
// lower-case characters with tilde
const std::map<str_t, str_t> lc = {
{SQ_STRLIT(Char, "ã"), SQ_STRLIT(Char, "\\tilde{a}")},
{SQ_STRLIT(Char, "ẽ"), SQ_STRLIT(Char, "\\tilde{e}")},
{SQ_STRLIT(Char, "ñ"), SQ_STRLIT(Char, "\\tilde{n}")},
{SQ_STRLIT(Char, "õ"), SQ_STRLIT(Char, "\\tilde{o}")},
{SQ_STRLIT(Char, "ũ"), SQ_STRLIT(Char, "\\tilde{u}")},
{SQ_STRLIT(Char, "ṽ"), SQ_STRLIT(Char, "\\tilde{v}")}};
auto lc_it = lc.find(str_t{ch});
if (lc_it != lc.end()) {
append(lc_it->second);
} else {
// upper-case characters with tilde
const std::map<str_t, str_t> uc = {
{SQ_STRLIT(Char, "Ã"), SQ_STRLIT(Char, "\\tilde{A}")},
{SQ_STRLIT(Char, "Ẽ"), SQ_STRLIT(Char, "\\tilde{E}")},
{SQ_STRLIT(Char, "Ñ"), SQ_STRLIT(Char, "\\tilde{N}")},
{SQ_STRLIT(Char, "Õ"), SQ_STRLIT(Char, "\\tilde{O}")},
{SQ_STRLIT(Char, "Ũ"), SQ_STRLIT(Char, "\\tilde{U}")},
{SQ_STRLIT(Char, "Ṽ"), SQ_STRLIT(Char, "\\tilde{V}")}};
auto uc_it = uc.find(str_t{ch});
if (uc_it != uc.end()) {
append(uc_it->second);
}
}
} // tilde
}
}
if (!result.empty())
return result;
else
return decltype(result)(str);
}
} // namespace detail
} // namespace sequant
#endif // SEQUANT_CORE_LATEX_IPP