sample_data 컬럼에 JSON 형태의 TEXT 로 저장하고자 할 때, 기존 CSV 파일의 컬럼의 순서가 보장이 안되는 현상 발생| id | no_sql_template | label | sql_template | number | source | direct_question | natural_question | sql_query | natural_question_edited | sql_query_edited | is_valid | returned_rows | first_row |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | sql_syntax_function | SELECT {column1:from_table1} FROM {table1}; | 1 | generated-rewritten | Select the id from the addresses table. | Can you provide the first ID from the addresses table? | SELECT id FROM addresses LIMIT 1; | Can you provide the first ID from the addresses table? | SELECT id FROM addresses LIMIT 1; | TRUE | 1 | {'id': 1} |
| 1 | 2 | sql_syntax_function | SELECT {column1:from_table1} FROM {table1}; | 2 | generated-rewritten | Select the user_id from the carts table. | Can you list the first 10 user IDs from the carts? | SELECT user_id FROM carts LIMIT 10; | Can you list the first 10 user IDs from the carts? | SELECT user_id FROM carts LIMIT 10; | TRUE | 10 | {'user_id': 1} |
| 2 | 2 | sql_syntax_function | SELECT {column1:from_table1} FROM {table1}; | 3 | generated-rewritten | Select the category_name from the categories table. | Can you list all the category names we have? | SELECT category_name FROM categories; | Can you list all the category names we have? | SELECT category_name FROM categories; | TRUE | 224 | {'category_name': "Children's"} |
| 3 | 2 | sql_syntax_function | SELECT {column1:from_table1} FROM {table1}; | 4 | generated-rewritten | Select the code from the coupons table. | Can you list all the coupon codes? | SELECT code FROM coupons; | Can you list all the coupon codes? | SELECT code FROM coupons; | TRUE | 10 | {'code': 'COUPON1835'} |
// Sample Entity
...
public class Sample extends Timestamp {
...
// JSON 타입의 TEXT 로 저장
@Column(columnDefinition = "JSON")
private String sampleData;
...
// Upload 파일 요청 Service 메서드
@Transactional
public void uploadCsvFile(MultipartFile file, DatasetMetadataDto metadata) throws Exception {
String datasetName = metadata.getDatasetName();
String datasetDescription = metadata.getDatasetDescription();
// Controller 에서 받아온 file 을 읽으면서 처리
try (CSVReader csvReader = new CSVReader(new InputStreamReader(file.getInputStream()))) {
String[] columns = csvReader.readNext();
if (columns == null) throw new FileProcessingException("CSV file is empty");
Map<String, Integer> columnIndexMap = new HashMap<>();
for (int i = 0; i < columns.length; i++) {
for (DatasetColumn column : DatasetColumn.values()) {
if (columns[i].equalsIgnoreCase(column.toString())) {
columnIndexMap.put(column.toString(), i);
}
}
}
...
// 파일에서 읽어온 데이터를 Sample 엔터티로 만든후 저장
while ((nextRecord = csvReader.readNext()) != null) {
JsonObject sampleData = new JsonObject();
for (int i = 0; i < columns.length; i++) {
sampleData.addProperty(columns[i], nextRecord[i]);
}
Sample sample = Sample.builder()
.id(UUID.randomUUID().toString())
.datasetName(datasetName)
.datasetDescription(datasetDescription)
.versionId(1L)
.status(SampleStatus.CREATED)
.sampleData(sampleData.toString())
.build();
sampleList.add(sample);
...
}
}
sampleRepository.saveAll(sampleList);
} catch (Exception e) {
throw new FileUploadException(e.getMessage(), e);
}
}

id, label, number, source, is_valid, first_row, sql_query, sql_template, returned_rows, direct_question, no_sql_template, natural_question, sql_query_edited, natural_question_edited
sample_data 에 저장되는 CSV 로우 데이터가 매번 업로드 시 컬럼 순서가 섞여버린 상태
→ 해당 값을 기준으로 다운로드 될 때 해당 섞인 컬럼 순서에 맞춰서 순서대로 다운로드가 되므로**, Upload / Download 컬럼 순서가 달라지는 현상**
// Sample Entity
...
public class Sample extends Timestamp {
...
// JSON 타입의 TEXT 로 저장
@Column(columnDefinition = "LONGTEXT")
private String sampleData;
...
