rufus/src/parser.c

533 lines
16 KiB
C
Raw Normal View History

/*
* Rufus: The Reliable USB Formatting Utility
* Elementary Unicode compliant find/replace parser
* Copyright (c) 2012 Pete Batard <pete@akeo.ie>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* Memory leaks detection - define _CRTDBG_MAP_ALLOC as preprocessor macro */
#ifdef _CRTDBG_MAP_ALLOC
#include <stdlib.h>
#include <crtdbg.h>
#endif
#include <windows.h>
#include <stdio.h>
#include <wchar.h>
#include <string.h>
#include <malloc.h>
#include <io.h>
#include <fcntl.h>
#include "rufus.h"
#include "msapi_utf8.h"
typedef struct {
uint8_t version[4];
char* type; // "release", "beta", "notice"
char* platform; // target platform ("windows", "linux", etc.)
char* platform_arch; // "x86", "x64", "arm"
char* platform_min; // minimum platform version required
char* download_url[2];
char* release_notes;
} rufus_update;
// Parse a line of UTF-16 text and return the data if it matches the 'token'
// The parsed line is of the form: [ ]token[ ]=[ ]["]data["][ ] and the line
// is modified by the parser
static wchar_t* get_token_data_line(const wchar_t* wtoken, wchar_t* wline)
{
const wchar_t wspace[] = L" \t"; // The only whitespaces we recognize as such
const wchar_t weol[] = L"\r\n";
size_t i, r;
2012-11-30 00:18:28 +00:00
BOOLEAN quoteth = FALSE;
if ((wtoken == NULL) || (wline == NULL) || (wline[0] == 0))
return NULL;
// Eliminate trailing EOL characters
wline[wcscspn(wline, weol)] = 0;
i = 0;
// Skip leading spaces
i += wcsspn(&wline[i], wspace);
// Our token should begin a line
if (_wcsnicmp(&wline[i], wtoken, wcslen(wtoken)) != 0)
return NULL;
// Token was found, move past token
i += wcslen(wtoken);
// Skip spaces
i += wcsspn(&wline[i], wspace);
// Check for an equal sign
if (wline[i] != L'=')
return NULL;
i++;
// Skip spaces after equal sign
i += wcsspn(&wline[i], wspace);
// eliminate leading quote, if it exists
if (wline[i] == L'"') {
quoteth = TRUE;
i++;
}
// Keep the starting pos of our data
r = i;
// locate end of string or quote
while ( (wline[i] != 0) && ((wline[i] != L'"') || ((wline[i] == L'"') && (!quoteth))) )
i++;
wline[i] = 0;
return (wline[r] == 0)?NULL:&wline[r];
}
// Parse a file (ANSI or UTF-8 or UTF-16) and return the data for the first occurence of 'token'
// The returned string is UTF-8 and MUST be freed by the caller
char* get_token_data_file(const char* token, const char* filename)
{
wchar_t *wtoken = NULL, *wdata= NULL, *wfilename = NULL;
wchar_t buf[1024];
FILE* fd = NULL;
char *ret = NULL;
if ((filename == NULL) || (token == NULL))
return NULL;
if ((filename[0] == 0) || (token[0] == 0))
return NULL;
wfilename = utf8_to_wchar(filename);
if (wfilename == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", filename);
goto out;
}
wtoken = utf8_to_wchar(token);
if (wfilename == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", token);
goto out;
}
fd = _wfopen(wfilename, L"r, ccs=UNICODE");
if (fd == NULL) goto out;
// Process individual lines. NUL is always appended.
// Ideally, we'd check that our buffer fits the line
while (fgetws(buf, ARRAYSIZE(buf), fd) != NULL) {
wdata = get_token_data_line(wtoken, buf);
if (wdata != NULL) {
ret = wchar_to_utf8(wdata);
break;
}
}
out:
if (fd != NULL)
fclose(fd);
safe_free(wfilename);
safe_free(wtoken);
return ret;
}
// Parse a buffer (ANSI or UTF-8) and return the data for the 'n'th occurence of 'token'
// The returned string is UTF-8 and MUST be freed by the caller
char* get_token_data_buffer(const char* token, unsigned int n, const char* buffer, size_t buffer_size)
{
unsigned int j;
wchar_t *wtoken = NULL, *wdata = NULL, *wbuffer = NULL, *wline = NULL;
size_t i;
BOOL done = FALSE;
char* ret = NULL;
// We're handling remote data => better safe than sorry
if ((token == NULL) || (buffer == NULL) || (buffer_size <= 4) || (buffer_size > 65536))
goto out;
// Ensure that our buffer is NUL terminated
if (buffer[buffer_size-1] != 0)
goto out;
wbuffer = utf8_to_wchar(buffer);
wtoken = utf8_to_wchar(token);
if ((wbuffer == NULL) || (wtoken == NULL))
goto out;
// Process individual lines
for (i=0,j=0,done=FALSE; (j!=n)&&(!done); ) {
wline = &wbuffer[i];
for(;(wbuffer[i]!=L'\n')&&(wbuffer[i]!=L'\r')&&(wbuffer[i]!=0);i++);
if (wbuffer[i]==0) {
done = TRUE;
} else {
wbuffer[i++] = 0;
}
wdata = get_token_data_line(wtoken, wline);
if (wdata != NULL) {
j++;
}
}
out:
if (wdata != NULL)
ret = wchar_to_utf8(wdata);
safe_free(wbuffer);
safe_free(wtoken);
return ret;
}
static __inline char* get_sanitized_token_data_buffer(const char* token, unsigned int n, const char* buffer, size_t buffer_size)
{
size_t i;
char* data = get_token_data_buffer(token, n, buffer, buffer_size);
if (data != NULL) {
for (i=0; i<safe_strlen(data); i++) {
if ((data[i] == '\\') && (data[i+1] == 'n')) {
data[i] = '\r';
data[i+1] = '\n';
}
}
}
return data;
}
// Parse an update data file and populates a rufus_update structure.
// NB: since this is remote data, and we're running elevated, it *IS* considered
// potentially malicious, even if it comes from a supposedly trusted server.
// len should be the size of the buffer - 1, for the zero terminator
void parse_update(char* buf, size_t len)
{
size_t i;
char *data = NULL, *token;
char allowed_chars[] = " \t\r\nabcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"$%^&*()-_+=<>(){}[].,:;#@'/?|~";
rufus_update update;
if ((buf == NULL) || (len < 2) || (len > 65536) || (buf[len-1] != 0))
return;
// Sanitize the data - Of course not a silver bullet, but it helps
for (i=0; i<len-1; i++) {
// Do not sanitize \n yet
// NB: we have a zero terminator, so we can afford a +1 without overflow
if ((strchr(allowed_chars, buf[i]) == NULL) && (buf[i] != '\\') && (buf[i+1] != 'n')) {
buf[i] = ' ';
}
}
2012-11-30 00:18:28 +00:00
for (i=0; i<4; i++) update.version[i] = 0;
if ((data = get_sanitized_token_data_buffer("version", 1, buf, len)) != NULL) {
for (i=0; (i<4) && ((token = strtok((i==0)?data:NULL, ".")) != NULL); i++) {
update.version[i] = (uint8_t)atoi(token);
}
safe_free(data);
}
// TODO: use X-Macros?
update.type = get_sanitized_token_data_buffer("type", 1, buf, len);
update.platform = get_sanitized_token_data_buffer("platform", 1, buf, len);
update.platform_arch = get_sanitized_token_data_buffer("platform_arch", 1, buf, len);
update.platform_min = get_sanitized_token_data_buffer("platform_min", 1, buf, len);
for (i=0; i<ARRAYSIZE(update.download_url); i++) {
update.download_url[i] = get_sanitized_token_data_buffer("download_url", (unsigned int)i+1, buf, len);
}
update.release_notes = get_sanitized_token_data_buffer("release_notes", 1, buf, len);
uprintf("UPDATE DATA:\n");
uprintf(" version: %d.%d.%d.%d\n", update.version[0], update.version[1], update.version[2], update.version[3]);
uprintf(" platform: %s\r\n platform_arch: %s\r\n platform_min: %s\n", update.platform, update.platform_arch, update.platform_min);
for (i=0; i<ARRAYSIZE(update.download_url); i++) {
uprintf(" url%d: %s\n", i+1, update.download_url[i]);
}
uprintf("RELEASE NOTES:\r\n%s\n", update.release_notes);
// TODO: free all these strings!
}
// Insert entry 'data' under section 'section' of a config file
// Section must include the relevant delimitors (eg '[', ']') if needed
char* insert_section_data(const char* filename, const char* section, const char* data, BOOL dos2unix)
{
const wchar_t* outmode[] = { L"w", L"w, ccs=UTF-8", L"w, ccs=UTF-16LE" };
wchar_t *wsection = NULL, *wfilename = NULL, *wtmpname = NULL, *wdata = NULL, bom = 0;
wchar_t wspace[] = L" \t";
wchar_t buf[1024];
FILE *fd_in = NULL, *fd_out = NULL;
size_t i, size;
int mode;
char *ret = NULL, tmp[2];
if ((filename == NULL) || (section == NULL) || (data == NULL))
return NULL;
if ((filename[0] == 0) || (section[0] == 0) || (data[0] == 0))
return NULL;
wfilename = utf8_to_wchar(filename);
if (wfilename == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", filename);
goto out;
}
wsection = utf8_to_wchar(section);
if (wfilename == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", section);
goto out;
}
wdata = utf8_to_wchar(data);
if (wdata == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", data);
goto out;
}
fd_in = _wfopen(wfilename, L"r, ccs=UNICODE");
if (fd_in == NULL) {
uprintf("Could not open file '%s'\n", filename);
goto out;
}
// Check the input file's BOM and create an output file with the same
fread(&bom, sizeof(bom), 1, fd_in);
switch(bom) {
case 0xFEFF:
mode = 2; // UTF-16 (LE)
break;
case 0xBBEF: // Yeah, the UTF-8 BOM is really 0xEF,0xBB,0xBF, but
mode = 1; // find me a non UTF-8 file that actually begins with "<22><>"
break;
default:
mode = 0; // ANSI
break;
}
fseek(fd_in, 0, SEEK_SET);
// uprintf("'%s' was detected as %s\n", filename,
// (mode==0)?"ANSI/UTF8 (no BOM)":((mode==1)?"UTF8 (with BOM)":"UTF16 (with BOM"));
wtmpname = (wchar_t*)calloc(wcslen(wfilename)+2, sizeof(wchar_t));
if (wtmpname == NULL) {
uprintf("Could not allocate space for temporary output name\n");
goto out;
}
wcscpy(wtmpname, wfilename);
wtmpname[wcslen(wtmpname)] = '~';
fd_out = _wfopen(wtmpname, outmode[mode]);
if (fd_out == NULL) {
uprintf("Could not open temporary output file %s~\n", filename);
goto out;
}
// Process individual lines. NUL is always appended.
while (fgetws(buf, ARRAYSIZE(buf), fd_in) != NULL) {
i = 0;
// Skip leading spaces
i += wcsspn(&buf[i], wspace);
// Our token should begin a line
if (_wcsnicmp(&buf[i], wsection, wcslen(wsection)) != 0) {
fputws(buf, fd_out);
continue;
}
// Section was found, output it
fputws(buf, fd_out);
// Now output the new data
fwprintf(fd_out, L"%s\n", wdata);
ret = (char*)data;
}
out:
if (fd_in != NULL) fclose(fd_in);
if (fd_out != NULL) fclose(fd_out);
// If an insertion occured, delete existing file and use the new one
if (ret != NULL) {
// We're in Windows text mode => Remove CRs if requested
fd_in = _wfopen(wtmpname, L"rb");
fd_out = _wfopen(wfilename, L"wb");
// Don't check fds
if ((fd_in != NULL) && (fd_out != NULL)) {
size = (mode==2)?2:1;
while(fread(tmp, size, 1, fd_in) == 1) {
if ((!dos2unix) || (tmp[0] != 0x0D))
fwrite(tmp, size, 1, fd_out);
}
fclose(fd_in);
fclose(fd_out);
} else {
uprintf("Could not write %s - original file has been left unmodifiedn", filename);
ret = NULL;
if (fd_in != NULL) fclose(fd_in);
if (fd_out != NULL) fclose(fd_out);
}
}
_wunlink(wtmpname);
safe_free(wfilename);
safe_free(wtmpname);
safe_free(wsection);
safe_free(wdata);
return ret;
}
// Search for a specific 'src' substring the data for all occurences of 'token', and replace
// it with 'rep'. File can be ANSI or UNICODE and is overwritten. Parameters are UTF-8.
// The parsed line is of the form: [ ]token[ ]data
// Returns a pointer to rep if replacement occured, NULL otherwise
char* replace_in_token_data(const char* filename, const char* token, const char* src, const char* rep, BOOL dos2unix)
{
const wchar_t* outmode[] = { L"w", L"w, ccs=UTF-8", L"w, ccs=UTF-16LE" };
wchar_t *wtoken = NULL, *wfilename = NULL, *wtmpname = NULL, *wsrc = NULL, *wrep = NULL, bom = 0;
wchar_t wspace[] = L" \t";
wchar_t buf[1024], *torep;
FILE *fd_in = NULL, *fd_out = NULL;
size_t i, size;
int mode;
char *ret = NULL, tmp[2];
if ((filename == NULL) || (token == NULL) || (src == NULL) || (rep == NULL))
return NULL;
if ((filename[0] == 0) || (token[0] == 0) || (src[0] == 0) || (rep[0] == 0))
return NULL;
if (strcmp(src, rep) == 0) // No need for processing is source is same as replacement
return NULL;
wfilename = utf8_to_wchar(filename);
if (wfilename == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", filename);
goto out;
}
wtoken = utf8_to_wchar(token);
if (wfilename == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", token);
goto out;
}
wsrc = utf8_to_wchar(src);
if (wsrc == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", src);
goto out;
}
wrep = utf8_to_wchar(rep);
if (wsrc == NULL) {
uprintf("Could not convert '%s' to UTF-16\n", rep);
goto out;
}
fd_in = _wfopen(wfilename, L"r, ccs=UNICODE");
if (fd_in == NULL) {
uprintf("Could not open file '%s'\n", filename);
goto out;
}
// Check the input file's BOM and create an output file with the same
fread(&bom, sizeof(bom), 1, fd_in);
switch(bom) {
case 0xFEFF:
mode = 2; // UTF-16 (LE)
break;
case 0xBBEF: // Yeah, the UTF-8 BOM is really 0xEF,0xBB,0xBF, but
mode = 1; // find me a non UTF-8 file that actually begins with "<22><>"
break;
default:
mode = 0; // ANSI
break;
}
fseek(fd_in, 0, SEEK_SET);
// uprintf("'%s' was detected as %s\n", filename,
// (mode==0)?"ANSI/UTF8 (no BOM)":((mode==1)?"UTF8 (with BOM)":"UTF16 (with BOM"));
wtmpname = (wchar_t*)calloc(wcslen(wfilename)+2, sizeof(wchar_t));
if (wtmpname == NULL) {
uprintf("Could not allocate space for temporary output name\n");
goto out;
}
wcscpy(wtmpname, wfilename);
wtmpname[wcslen(wtmpname)] = '~';
fd_out = _wfopen(wtmpname, outmode[mode]);
if (fd_out == NULL) {
uprintf("Could not open temporary output file %s~\n", filename);
goto out;
}
// Process individual lines. NUL is always appended.
while (fgetws(buf, ARRAYSIZE(buf), fd_in) != NULL) {
i = 0;
// Skip leading spaces
i += wcsspn(&buf[i], wspace);
// Our token should begin a line
if (_wcsnicmp(&buf[i], wtoken, wcslen(wtoken)) != 0) {
fputws(buf, fd_out);
continue;
}
// Token was found, move past token
i += strlen(token);
// Skip spaces
i += wcsspn(&buf[i], wspace);
torep = wcsstr(&buf[i], wsrc);
if (torep == NULL) {
fputws(buf, fd_out);
continue;
}
i = (torep-buf) + wcslen(wsrc);
*torep = 0;
fwprintf(fd_out, L"%s%s%s", buf, wrep, &buf[i]);
ret = (char*)rep;
}
out:
if (fd_in != NULL) fclose(fd_in);
if (fd_out != NULL) fclose(fd_out);
// If a replacement occured, delete existing file and use the new one
if (ret != NULL) {
// We're in Windows text mode => Remove CRs if requested
fd_in = _wfopen(wtmpname, L"rb");
fd_out = _wfopen(wfilename, L"wb");
// Don't check fds
if ((fd_in != NULL) && (fd_out != NULL)) {
size = (mode==2)?2:1;
while(fread(tmp, size, 1, fd_in) == 1) {
if ((!dos2unix) || (tmp[0] != 0x0D))
fwrite(tmp, size, 1, fd_out);
}
fclose(fd_in);
fclose(fd_out);
} else {
uprintf("Could not write %s - original file has been left unmodified.\n", filename);
ret = NULL;
if (fd_in != NULL) fclose(fd_in);
if (fd_out != NULL) fclose(fd_out);
}
}
_wunlink(wtmpname);
safe_free(wfilename);
safe_free(wtmpname);
safe_free(wtoken);
safe_free(wsrc);
safe_free(wrep);
return ret;
}