aboutsummaryrefslogtreecommitdiff
path: root/src/windows/mod.rs
blob: 3b6105b2750847aaa2a52b435f91c57bf0aa1bf2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// These functions are necessarily inefficient, because they must revert
// encoding conversions performed by the standard library. However, there is
// currently no better alternative.

use std::borrow::Cow;
use std::error::Error;
use std::ffi::OsStr;
use std::ffi::OsString;
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::os::windows::ffi::OsStrExt;
use std::os::windows::ffi::OsStringExt;
use std::result;
use std::str;

if_raw_str! {
    pub(super) mod raw;
}

mod wtf8;
use wtf8::encode_wide;
use wtf8::DecodeWide;

#[derive(Debug, Eq, PartialEq)]
pub(super) enum EncodingError {
    Byte(u8),
    CodePoint(u32),
    End(),
}

impl EncodingError {
    fn position(&self) -> Cow<'_, str> {
        match self {
            Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)),
            Self::CodePoint(code_point) => {
                Cow::Owned(format!("code point U+{:04X}", code_point))
            }
            Self::End() => Cow::Borrowed("end of string"),
        }
    }
}

impl Display for EncodingError {
    fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
        write!(
            formatter,
            "byte sequence is not representable in the platform encoding; \
            error at {}",
            self.position(),
        )
    }
}

impl Error for EncodingError {}

type Result<T> = result::Result<T, EncodingError>;

fn from_bytes(string: &[u8]) -> Result<OsString> {
    let encoder = encode_wide(string);

    // Collecting an iterator into a result ignores the size hint:
    // https://github.com/rust-lang/rust/issues/48994
    let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
    for wchar in encoder {
        encoded_string.push(wchar?);
    }
    Ok(OsStringExt::from_wide(&encoded_string))
}

fn to_bytes(os_string: &OsStr) -> Vec<u8> {
    let encoder = OsStrExt::encode_wide(os_string);

    let mut string = Vec::with_capacity(encoder.size_hint().0);
    string.extend(DecodeWide::new(encoder));
    string
}

pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
    from_bytes(string).map(Cow::Owned)
}

pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
    Cow::Owned(to_bytes(os_string))
}

pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
    from_bytes(&string)
}

pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
    to_bytes(&os_string)
}

#[cfg(test)]
mod tests {
    use std::ffi::OsStr;

    use crate::OsStrBytes;

    use super::EncodingError;

    #[test]
    fn test_invalid() {
        use EncodingError::Byte;
        use EncodingError::CodePoint;
        use EncodingError::End;

        test_error(Byte(b'\x83'), b"\x0C\x83\xD7\x3E");
        test_error(Byte(b'\x52'), b"\x19\xF7\x52\x84");
        test_error(Byte(b'\xB8'), b"\x70\xB8\x1F\x66");
        test_error(CodePoint(0x34_0388), b"\x70\xFD\x80\x8E\x88");
        test_error(Byte(b'\x80'), b"\x80");
        test_error(Byte(b'\x80'), b"\x80\x80");
        test_error(Byte(b'\x80'), b"\x80\x80\x80");
        test_error(Byte(b'\x81'), b"\x81");
        test_error(Byte(b'\x88'), b"\x88\xB4\xC7\x46");
        test_error(Byte(b'\x97'), b"\x97\xCE\x06");
        test_error(Byte(b'\x00'), b"\xC2\x00");
        test_error(Byte(b'\x7F'), b"\xC2\x7F");
        test_error(Byte(b'\x09'), b"\xCD\x09\x95");
        test_error(Byte(b'\x43'), b"\xCD\x43\x5F\xA0");
        test_error(Byte(b'\x69'), b"\xD7\x69\xB2");
        test_error(CodePoint(0x528), b"\xE0\x94\xA8");
        test_error(CodePoint(0x766), b"\xE0\x9D\xA6\x12\xAE");
        test_error(Byte(b'\xFD'), b"\xE2\xAB\xFD\x51");
        test_error(Byte(b'\xC4'), b"\xE3\xC4");
        test_error(CodePoint(0xDC00), b"\xED\xA0\x80\xED\xB0\x80");
        test_error(End(), b"\xF1");
        test_error(End(), b"\xF1\x80");
        test_error(End(), b"\xF1\x80\x80");
        test_error(Byte(b'\xF1'), b"\xF1\x80\x80\xF1");
        test_error(CodePoint(0x11_09CC), b"\xF4\x90\xA7\x8C");
        test_error(CodePoint(0x15_EC46), b"\xF5\x9E\xB1\x86");
        test_error(End(), b"\xFB");
        test_error(End(), b"\xFB\x80");
        test_error(End(), b"\xFB\x80\x80");
        test_error(CodePoint(0x2C_0000), b"\xFB\x80\x80\x80");
        test_error(End(), b"\xFF");
        test_error(End(), b"\xFF\x80");
        test_error(End(), b"\xFF\x80\x80");
        test_error(CodePoint(0x3C_0000), b"\xFF\x80\x80\x80");
        test_error(CodePoint(0x3C_6143), b"\xFF\x86\x85\x83");

        fn test_error(error: EncodingError, string: &[u8]) {
            assert_eq!(
                Err(error),
                OsStr::from_raw_bytes(string).map_err(|x| x.0),
            );
        }
    }
}