/* * Rufus: The Reliable USB Formatting Utility * Elementary Unicode compliant find/replace parser * Copyright (c) 2012 Pete Batard * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* Memory leaks detection - define _CRTDBG_MAP_ALLOC as preprocessor macro */ #ifdef _CRTDBG_MAP_ALLOC #include #include #endif #include #include #include #include #include #include #include #include "rufus.h" #include "msapi_utf8.h" // Parse a file (ANSI or UTF-8 or UTF-16) and return the data for the first occurence of 'token' // The parsed line is of the form: [ ]token[ ]=[ ]["]data["] // The returned string is UTF-8 and MUST be freed by the caller char* get_token_data(const char* filename, const char* token) { wchar_t *wtoken = NULL, *wfilename = NULL; wchar_t wspace[] = L" \t"; wchar_t weol[] = L"\r\n"; wchar_t buf[1024]; FILE* fd = NULL; size_t i, r; char *ret = NULL; if ((filename == NULL) || (token == NULL)) return NULL; if ((filename[0] == 0) || (token[0] == 0)) return NULL; wfilename = utf8_to_wchar(filename); if (wfilename == NULL) { uprintf("Could not convert '%s' to UTF-16\n", filename); goto out; } wtoken = utf8_to_wchar(token); if (wfilename == NULL) { uprintf("Could not convert '%s' to UTF-16\n", token); goto out; } fd = _wfopen(wfilename, L"r, ccs=UNICODE"); if (fd == NULL) goto out; // Process individual lines. NUL is always appended. // Ideally, we'd check that our buffer fits the line while (fgetws(buf, ARRAYSIZE(buf), fd) != NULL) { // Eliminate trailing EOL characters buf[wcscspn(buf, weol)] = 0; i = 0; // Skip leading spaces i += wcsspn(&buf[i], wspace); // Our token should begin a line if (_wcsnicmp(&buf[i], wtoken, wcslen(wtoken)) != 0) continue; // Token was found, move past token i += wcslen(wtoken); // Skip spaces i += wcsspn(&buf[i], wspace); // Check for an equal sign if (buf[i] != L'=') continue; i++; // Skip spaces after equal sign i += wcsspn(&buf[i], wspace); // eliminate leading quote, if it exists if (buf[i] == L'"') i++; // Keep the starting pos of our data r = i; // locate end of string or quote while ((buf[i] != 0) && (buf[i] != L'"')) i++; buf[i] = 0; ret = wchar_to_utf8(&buf[r]); break; } out: if (fd != NULL) fclose(fd); safe_free(wfilename); safe_free(wtoken); return ret; } // Insert entry 'data' under section 'section' of a config file // Section must include the relevant delimitors (eg '[', ']') if needed char* insert_section_data(const char* filename, const char* section, const char* data, BOOL dos2unix) { const wchar_t* outmode[] = { L"w", L"w, ccs=UTF-8", L"w, ccs=UTF-16LE" }; wchar_t *wsection = NULL, *wfilename = NULL, *wtmpname = NULL, *wdata = NULL, bom = 0; wchar_t wspace[] = L" \t"; wchar_t buf[1024]; FILE *fd_in = NULL, *fd_out = NULL; size_t i, size; int mode; char *ret = NULL, tmp[2]; if ((filename == NULL) || (section == NULL) || (data == NULL)) return NULL; if ((filename[0] == 0) || (section[0] == 0) || (data[0] == 0)) return NULL; wfilename = utf8_to_wchar(filename); if (wfilename == NULL) { uprintf("Could not convert '%s' to UTF-16\n", filename); goto out; } wsection = utf8_to_wchar(section); if (wfilename == NULL) { uprintf("Could not convert '%s' to UTF-16\n", section); goto out; } wdata = utf8_to_wchar(data); if (wdata == NULL) { uprintf("Could not convert '%s' to UTF-16\n", data); goto out; } fd_in = _wfopen(wfilename, L"r, ccs=UNICODE"); if (fd_in == NULL) { uprintf("Could not open file '%s'\n", filename); goto out; } // Check the input file's BOM and create an output file with the same fread(&bom, sizeof(bom), 1, fd_in); switch(bom) { case 0xFEFF: mode = 2; // UTF-16 (LE) break; case 0xBBEF: // Yeah, the UTF-8 BOM is really 0xEF,0xBB,0xBF, but mode = 1; // find me a non UTF-8 file that actually begins with "ï»" break; default: mode = 0; // ANSI break; } fseek(fd_in, 0, SEEK_SET); // uprintf("'%s' was detected as %s\n", filename, // (mode==0)?"ANSI/UTF8 (no BOM)":((mode==1)?"UTF8 (with BOM)":"UTF16 (with BOM")); wtmpname = (wchar_t*)calloc(wcslen(wfilename)+2, sizeof(wchar_t)); if (wtmpname == NULL) { uprintf("Could not allocate space for temporary output name\n"); goto out; } wcscpy(wtmpname, wfilename); wtmpname[wcslen(wtmpname)] = '~'; fd_out = _wfopen(wtmpname, outmode[mode]); if (fd_out == NULL) { uprintf("Could not open temporary output file %s~\n", filename); goto out; } // Process individual lines. NUL is always appended. while (fgetws(buf, ARRAYSIZE(buf), fd_in) != NULL) { i = 0; // Skip leading spaces i += wcsspn(&buf[i], wspace); // Our token should begin a line if (_wcsnicmp(&buf[i], wsection, wcslen(wsection)) != 0) { fputws(buf, fd_out); continue; } // Section was found, output it fputws(buf, fd_out); // Now output the new data fwprintf(fd_out, L"%s\n", wdata); ret = (char*)data; } out: if (fd_in != NULL) fclose(fd_in); if (fd_out != NULL) fclose(fd_out); // If an insertion occured, delete existing file and use the new one if (ret != NULL) { // We're in Windows text mode => Remove CRs if requested fd_in = _wfopen(wtmpname, L"rb"); fd_out = _wfopen(wfilename, L"wb"); // Don't check fds if ((fd_in != NULL) && (fd_out != NULL)) { size = (mode==2)?2:1; while(fread(tmp, size, 1, fd_in) == 1) { if ((!dos2unix) || (tmp[0] != 0x0D)) fwrite(tmp, size, 1, fd_out); } fclose(fd_in); fclose(fd_out); } else { uprintf("Could not write %s - original file has been left unmodifiedn", filename); ret = NULL; if (fd_in != NULL) fclose(fd_in); if (fd_out != NULL) fclose(fd_out); } } _wunlink(wtmpname); safe_free(wfilename); safe_free(wtmpname); safe_free(wsection); safe_free(wdata); return ret; } // Search for a specific 'src' substring the data for all occurences of 'token', and replace // if with 'rep'. File can be ANSI or UNICODE and is overwritten. Parameters are UTF-8. // The parsed line is of the form: [ ]token[ ]data // Returns a pointer to rep if replacement occured, NULL otherwise char* replace_in_token_data(const char* filename, const char* token, const char* src, const char* rep, BOOL dos2unix) { const wchar_t* outmode[] = { L"w", L"w, ccs=UTF-8", L"w, ccs=UTF-16LE" }; wchar_t *wtoken = NULL, *wfilename = NULL, *wtmpname = NULL, *wsrc = NULL, *wrep = NULL, bom = 0; wchar_t wspace[] = L" \t"; wchar_t buf[1024], *torep; FILE *fd_in = NULL, *fd_out = NULL; size_t i, size; int mode; char *ret = NULL, tmp[2]; if ((filename == NULL) || (token == NULL) || (src == NULL) || (rep == NULL)) return NULL; if ((filename[0] == 0) || (token[0] == 0) || (src[0] == 0) || (rep[0] == 0)) return NULL; if (strcmp(src, rep) == 0) // No need for processing is source is same as replacement return NULL; wfilename = utf8_to_wchar(filename); if (wfilename == NULL) { uprintf("Could not convert '%s' to UTF-16\n", filename); goto out; } wtoken = utf8_to_wchar(token); if (wfilename == NULL) { uprintf("Could not convert '%s' to UTF-16\n", token); goto out; } wsrc = utf8_to_wchar(src); if (wsrc == NULL) { uprintf("Could not convert '%s' to UTF-16\n", src); goto out; } wrep = utf8_to_wchar(rep); if (wsrc == NULL) { uprintf("Could not convert '%s' to UTF-16\n", rep); goto out; } fd_in = _wfopen(wfilename, L"r, ccs=UNICODE"); if (fd_in == NULL) { uprintf("Could not open file '%s'\n", filename); goto out; } // Check the input file's BOM and create an output file with the same fread(&bom, sizeof(bom), 1, fd_in); switch(bom) { case 0xFEFF: mode = 2; // UTF-16 (LE) break; case 0xBBEF: // Yeah, the UTF-8 BOM is really 0xEF,0xBB,0xBF, but mode = 1; // find me a non UTF-8 file that actually begins with "ï»" break; default: mode = 0; // ANSI break; } fseek(fd_in, 0, SEEK_SET); // uprintf("'%s' was detected as %s\n", filename, // (mode==0)?"ANSI/UTF8 (no BOM)":((mode==1)?"UTF8 (with BOM)":"UTF16 (with BOM")); wtmpname = (wchar_t*)calloc(wcslen(wfilename)+2, sizeof(wchar_t)); if (wtmpname == NULL) { uprintf("Could not allocate space for temporary output name\n"); goto out; } wcscpy(wtmpname, wfilename); wtmpname[wcslen(wtmpname)] = '~'; fd_out = _wfopen(wtmpname, outmode[mode]); if (fd_out == NULL) { uprintf("Could not open temporary output file %s~\n", filename); goto out; } // Process individual lines. NUL is always appended. while (fgetws(buf, ARRAYSIZE(buf), fd_in) != NULL) { i = 0; // Skip leading spaces i += wcsspn(&buf[i], wspace); // Our token should begin a line if (_wcsnicmp(&buf[i], wtoken, wcslen(wtoken)) != 0) { fputws(buf, fd_out); continue; } // Token was found, move past token i += strlen(token); // Skip spaces i += wcsspn(&buf[i], wspace); torep = wcsstr(&buf[i], wsrc); if (torep == NULL) { fputws(buf, fd_out); continue; } i = (torep-buf) + wcslen(wsrc); *torep = 0; fwprintf(fd_out, L"%s%s%s", buf, wrep, &buf[i]); ret = (char*)rep; } out: if (fd_in != NULL) fclose(fd_in); if (fd_out != NULL) fclose(fd_out); // If a replacement occured, delete existing file and use the new one if (ret != NULL) { // We're in Windows text mode => Remove CRs if requested fd_in = _wfopen(wtmpname, L"rb"); fd_out = _wfopen(wfilename, L"wb"); // Don't check fds if ((fd_in != NULL) && (fd_out != NULL)) { size = (mode==2)?2:1; while(fread(tmp, size, 1, fd_in) == 1) { if ((!dos2unix) || (tmp[0] != 0x0D)) fwrite(tmp, size, 1, fd_out); } fclose(fd_in); fclose(fd_out); } else { uprintf("Could not write %s - original file has been left unmodified.\n", filename); ret = NULL; if (fd_in != NULL) fclose(fd_in); if (fd_out != NULL) fclose(fd_out); } } _wunlink(wtmpname); safe_free(wfilename); safe_free(wtmpname); safe_free(wtoken); safe_free(wsrc); safe_free(wrep); return ret; }